From 1143f9bfcc52b91f00e1d18a002961bf027b40f5 Mon Sep 17 00:00:00 2001
From: welsberr <welsberr@gmail.com>
Date: Fri, 10 Apr 2026 04:44:45 +0000
Subject: [PATCH] Prepare public-safe repo update

---
 .gitignore                                    |    9 +
 README.md                                     |   19 +-
 apps/api/src/ecospecies_api/app.py            |  417 +++++-
 apps/api/src/ecospecies_api/auth.py           |   47 +-
 .../src/ecospecies_api/citation_enrichment.py | 1018 +++++++++++++++
 .../src/ecospecies_api/citegeist_bridge.py    |  387 ++++++
 .../api/src/ecospecies_api/document_format.py |  480 +++++++
 .../src/ecospecies_api/document_repository.py |  267 ++++
 apps/api/src/ecospecies_api/models.py         |  108 ++
 apps/api/src/ecospecies_api/parser.py         |  119 +-
 apps/api/src/ecospecies_api/repository.py     | 1146 ++++++++++++++++-
 apps/api/test_auth.py                         |   21 +
 apps/api/test_citation_enrichment.py          |   21 +
 apps/api/test_document_format.py              |   21 +
 apps/api/test_parser.py                       |   21 +
 apps/api/tests/test_auth.py                   |   58 +
 apps/api/tests/test_citation_enrichment.py    |  527 ++++++++
 apps/api/tests/test_document_format.py        |  195 +++
 apps/api/tests/test_parser.py                 |  109 ++
 apps/api/tests/test_repository.py             |  660 ++++++++++
 apps/web/app.js                               | 1113 +++++++++++++++-
 apps/web/bibliography.html                    |   43 +
 apps/web/bibliography.js                      |  230 ++++
 apps/web/index.html                           |  206 ++-
 apps/web/nginx.conf                           |   31 +
 apps/web/styles.css                           |  515 +++++++-
 docker-compose.yml                            |    8 +-
 docs/citegeist-review-notes.md                |  110 ++
 docs/dc-orig.yml                              |   89 ++
 docs/docker-compose-traefik.env.example       |   20 +
 docs/docker-compose-traefik.yml               |   93 ++
 docs/postgres-backup.md                       |   48 +
 docs/roadmap.md                               |  115 +-
 docs/standards-migration-plan.md              |  315 +++++
 docs/structured-markdown-plan.md              |  338 +++++
 docs/traefik-deploy.md                        |   79 ++
 scripts/backfill-citations.py                 |  185 +++
 scripts/backup-postgres.sh                    |   28 +
 scripts/restore-postgres.sh                   |   37 +
 scripts/run-citation-backfill.sh              |   21 +
 40 files changed, 9099 insertions(+), 175 deletions(-)
 create mode 100644 apps/api/src/ecospecies_api/citation_enrichment.py
 create mode 100644 apps/api/src/ecospecies_api/citegeist_bridge.py
 create mode 100644 apps/api/src/ecospecies_api/document_format.py
 create mode 100644 apps/api/src/ecospecies_api/document_repository.py
 create mode 100644 apps/api/test_auth.py
 create mode 100644 apps/api/test_citation_enrichment.py
 create mode 100644 apps/api/test_document_format.py
 create mode 100644 apps/api/test_parser.py
 create mode 100644 apps/api/tests/test_auth.py
 create mode 100644 apps/api/tests/test_citation_enrichment.py
 create mode 100644 apps/api/tests/test_document_format.py
 create mode 100644 apps/api/tests/test_parser.py
 create mode 100644 apps/web/bibliography.html
 create mode 100644 apps/web/bibliography.js
 create mode 100644 docs/citegeist-review-notes.md
 create mode 100644 docs/dc-orig.yml
 create mode 100644 docs/docker-compose-traefik.env.example
 create mode 100644 docs/docker-compose-traefik.yml
 create mode 100644 docs/postgres-backup.md
 create mode 100644 docs/standards-migration-plan.md
 create mode 100644 docs/structured-markdown-plan.md
 create mode 100644 docs/traefik-deploy.md
 create mode 100644 scripts/backfill-citations.py
 create mode 100644 scripts/backup-postgres.sh
 create mode 100644 scripts/restore-postgres.sh
 create mode 100644 scripts/run-citation-backfill.sh

diff --git a/.gitignore b/.gitignore
index 6401844..a3d8156 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,12 @@ __pycache__/
 node_modules/
 test-results/
 playwright-report/
+*~
+*.env
+secrets*
+codex*
+restart.sh
+*lock.json
+input-data/
+legacy-data
+var/logs/
diff --git a/README.md b/README.md
index 1c731e8..180d960 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ Docker Compose owns all runtime dependencies:
 - Python services run in `python:3.12-slim`
 - the Python virtual environment is created in a Docker-managed volume mounted at `/workspace/.docker/venv`
 - dependencies are installed from `apps/api/requirements.txt` inside that virtual environment
-- the legacy corpus is mounted read-only from `../01-legacy-code-and-data`
+- the legacy corpus is mounted read-only from a sibling directory, defaulting to `../legacy-corpus`
 
 No host Python packages are required for the Compose workflow.
 
@@ -48,6 +48,13 @@ Endpoints:
 - editor section detail/update: `/api/editor/species/<slug>/sections/<position>` (requires `editor` or `admin`)
 - editor audit history: `/api/editor/species/<slug>/audit` (requires `editor` or `admin`)
 
+The app can also be published under a URL prefix. A reverse-proxy deployment can publish the app at a host and path such as:
+
+- `ECOSPECIES_HOSTNAME=example.org`
+- `ECOSPECIES_BASE_PATH=/apps/ecospecies`
+
+When the site is served below a path prefix, the frontend derives its API base from the current page URL and nginx serves both the UI and proxied API under that same prefix.
+
 If those host ports are already in use, override them when starting Compose, for example:
 
 ```bash
@@ -87,6 +94,14 @@ Run the browser-level smoke test against the real Compose stack with:
 ./scripts/check-ui-stack-smoke.sh
 ```
 
+Run a bounded citation backfill pass with:
+
+```bash
+./scripts/run-citation-backfill.sh
+```
+
+The wrapper runs inside `ecospecies-api`, keeps a rotating cursor in `var/citation-backfill.cursor`, and skips a run if another backfill is already active.
+
 ## Notes
 
 - The importer seeds PostgreSQL from the legacy text corpus before the API starts and now synchronizes by slug instead of truncating the full dataset.
@@ -98,6 +113,8 @@ Run the browser-level smoke test against the real Compose stack with:
 - Initial editor auth uses `ECOSPECIES_AUTH_TOKENS` in the format `token:username:role[,token2:username2:role2]`, where `role` is `viewer`, `editor`, or `admin`.
 - Editorial workflow state is persisted per species with `draft`, `review`, and `published` statuses. Public endpoints return only `published` records; editor endpoints can inspect and update all records.
 - Editors can curate top-level metadata and section content from the web UI, and every editorial or section change is recorded in per-species audit history.
+- Citation backfill can be scheduled externally, such as with a nightly cron job that runs `./scripts/run-citation-backfill.sh`. Use `ECOSPECIES_BACKFILL_LOG_DIR` if logs should go somewhere other than `var/logs`.
+- Unresolved citation enrichment now still refreshes the locally parsed BibTeX and normalized citation text, so parser improvements propagate even without a remote metadata match.
 - Summary authoring guidance for future FLELMR-compatible records is in `docs/flelmr-authoring.md`.
 - Legacy survey and roadmap artifacts are in `docs/`.
 
diff --git a/apps/api/src/ecospecies_api/app.py b/apps/api/src/ecospecies_api/app.py
index f8c91ed..c75e12e 100644
--- a/apps/api/src/ecospecies_api/app.py
+++ b/apps/api/src/ecospecies_api/app.py
@@ -15,17 +15,36 @@ from ecospecies_api.auth import (
 )
 from ecospecies_api.parser import get_default_data_dir, load_species_records
 from ecospecies_api.repository import (
+    add_species_citation_from_candidate,
+    apply_species_citation_candidate_selection,
+    create_contributor_species,
+    get_contributor_species_citations,
+    get_contributor_species_detail,
+    get_contributor_species_document,
+    get_contributor_species_list,
+    get_species_citation_candidates,
+    get_editor_species_citations,
     get_editor_species_detail,
+    get_species_document,
     get_editor_species_list,
     get_editor_species_workflow,
+    get_minimum_contributor_age,
     get_species_by_slug,
     list_species_audit,
+    list_public_bibliography,
     get_readiness_status,
     get_summary_metrics,
     has_species_data,
     import_species_payload,
     list_diagnostics,
     list_species,
+    register_contributor,
+    update_species_citation_enrichment,
+    backfill_species_citations,
+    update_species_citations_enrichment_batch,
+    update_species_citation_review,
+    update_contributor_species_document_markdown,
+    update_species_document_markdown,
     update_species_section,
     update_species_editorial,
 )
@@ -99,6 +118,7 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
                 {
                     "authenticated": session is not None,
                     "auth_configured": auth_is_configured(),
+                    "minimum_contributor_age": get_minimum_contributor_age(),
                     "user": (
                         {"username": session.username, "role": session.role}
                         if session is not None
@@ -108,6 +128,23 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
             )
             return
 
+        if path == "/api/contributor/status":
+            if not self.require_role(session, "contributor"):
+                return
+            self.write_json(
+                {
+                    "status": "ok",
+                    "contributor_access": True,
+                    "user": {"username": session.username, "role": session.role},
+                    "minimum_age": get_minimum_contributor_age(),
+                    "capabilities": [
+                        "create_species_draft",
+                        "edit_owned_drafts",
+                    ],
+                }
+            )
+            return
+
         if path == "/api/editor/status":
             if not self.require_role(session, "editor"):
                 return
@@ -135,10 +172,42 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
                     "slug": item["slug"],
                     "title": item["title"],
                     "common_name": item["common_name"],
+                    "scientific_name": item["scientific_name"],
+                    "legacy_identifiers": item["legacy_identifiers"],
+                    "taxon_identifiers": item["taxon_identifiers"],
+                    "primary_taxon_authority": item["primary_taxon_authority"],
+                    "primary_taxon_identifier": item["primary_taxon_identifier"],
                     "publication_status": item["publication_status"],
                     "is_archived": item["is_archived"],
                     "last_modified_by": item["last_modified_by"],
                     "diagnostic_count": len(item["diagnostics"]),
+                    "summary": item["summary"],
+                }
+                for item in items
+            ]
+            self.write_json({"items": compact, "count": len(compact)})
+            return
+
+        if path == "/api/contributor/species":
+            if not self.require_role(session, "contributor"):
+                return
+            search = query.get("search", [""])[0].strip().lower()
+            items = get_contributor_species_list(session.username, search)
+            compact = [
+                {
+                    "slug": item["slug"],
+                    "title": item["title"],
+                    "common_name": item["common_name"],
+                    "scientific_name": item["scientific_name"],
+                    "legacy_identifiers": item["legacy_identifiers"],
+                    "taxon_identifiers": item["taxon_identifiers"],
+                    "primary_taxon_authority": item["primary_taxon_authority"],
+                    "primary_taxon_identifier": item["primary_taxon_identifier"],
+                    "publication_status": item["publication_status"],
+                    "is_archived": item["is_archived"],
+                    "last_modified_by": item["last_modified_by"],
+                    "diagnostic_count": len(item["diagnostics"]),
+                    "summary": item["summary"],
                 }
                 for item in items
             ]
@@ -176,7 +245,68 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
             self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
             return
 
-        if path.startswith("/api/editor/species/") and not path.endswith("/workflow") and not path.endswith("/editorial") and not path.endswith("/audit"):
+        if path.startswith("/api/editor/species/") and path.endswith("/document"):
+            if not self.require_role(session, "editor"):
+                return
+            slug = path[len("/api/editor/species/") : -len("/document")].strip("/")
+            item = get_species_document(slug)
+            if item is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+            self.write_json(item)
+            return
+
+        if path.startswith("/api/editor/species/") and path.endswith("/citations"):
+            if not self.require_role(session, "editor"):
+                return
+            slug = path[len("/api/editor/species/") : -len("/citations")].strip("/")
+            item = get_editor_species_citations(slug)
+            if item is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+            self.write_json(item)
+            return
+
+        if path.startswith("/api/editor/species/") and "/citations/" in path and path.endswith("/candidates"):
+            if not self.require_role(session, "editor"):
+                return
+            slug, _, tail = path[len("/api/editor/species/") :].partition("/citations/")
+            citation_tail = tail[: -len("/candidates")].strip("/")
+            try:
+                citation_id = int(citation_tail)
+            except ValueError:
+                self.write_json({"error": "Invalid citation id"}, status=HTTPStatus.BAD_REQUEST)
+                return
+            item = get_species_citation_candidates(slug.strip("/"), citation_id)
+            if item is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+            self.write_json(item)
+            return
+
+        if path.startswith("/api/contributor/species/") and path.endswith("/document"):
+            if not self.require_role(session, "contributor"):
+                return
+            slug = path[len("/api/contributor/species/") : -len("/document")].strip("/")
+            item = get_contributor_species_document(slug, session.username)
+            if item is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+            self.write_json(item)
+            return
+
+        if path.startswith("/api/contributor/species/") and path.endswith("/citations"):
+            if not self.require_role(session, "contributor"):
+                return
+            slug = path[len("/api/contributor/species/") : -len("/citations")].strip("/")
+            item = get_contributor_species_citations(slug, session.username)
+            if item is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+            self.write_json(item)
+            return
+
+        if path.startswith("/api/editor/species/") and not path.endswith("/workflow") and not path.endswith("/editorial") and not path.endswith("/audit") and not path.endswith("/document"):
             if not self.require_role(session, "editor"):
                 return
             slug = path[len("/api/editor/species/") :].strip("/")
@@ -187,6 +317,17 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
             self.write_json(item)
             return
 
+        if path.startswith("/api/contributor/species/") and not path.endswith("/document"):
+            if not self.require_role(session, "contributor"):
+                return
+            slug = path[len("/api/contributor/species/") :].strip("/")
+            item = get_contributor_species_detail(slug, session.username)
+            if item is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+            self.write_json(item)
+            return
+
         if path.startswith("/api/editor/species/") and path.endswith("/workflow"):
             if not self.require_role(session, "editor"):
                 return
@@ -215,6 +356,12 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
             self.write_json({"items": flagged, "count": len(flagged)})
             return
 
+        if path == "/api/bibliography":
+            search = query.get("search", [""])[0].strip()
+            items = list_public_bibliography(search=search)
+            self.write_json({"items": items, "count": len(items)})
+            return
+
         if path == "/api/species":
             search = query.get("search", [""])[0].strip().lower()
             species = list_species(search)
@@ -225,6 +372,10 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
                     "common_name": item["common_name"],
                     "scientific_name": item["scientific_name"],
                     "flelmr_code": item["flelmr_code"],
+                    "legacy_identifiers": item["legacy_identifiers"],
+                    "taxon_identifiers": item["taxon_identifiers"],
+                    "primary_taxon_authority": item["primary_taxon_authority"],
+                    "primary_taxon_identifier": item["primary_taxon_identifier"],
                     "summary": item["summary"],
                     "section_count": item["section_count"],
                     "diagnostic_count": len(item["diagnostics"]),
@@ -250,6 +401,47 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
         path = parsed.path
         session = resolve_auth_session(self.headers)
 
+        if path == "/api/contributor/register":
+            payload = self.read_json_body()
+            if payload is None:
+                return
+            email = payload.get("email")
+            age_gate_confirmed = payload.get("age_gate_confirmed")
+            if not isinstance(email, str):
+                self.write_json({"error": "email must be a string"}, status=HTTPStatus.BAD_REQUEST)
+                return
+            if not isinstance(age_gate_confirmed, bool):
+                self.write_json(
+                    {"error": "age_gate_confirmed must be a boolean"},
+                    status=HTTPStatus.BAD_REQUEST,
+                )
+                return
+            try:
+                result = register_contributor(email=email, age_gate_confirmed=age_gate_confirmed)
+            except ValueError as exc:
+                self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST)
+                return
+            self.write_json({"status": "ok", **result}, status=HTTPStatus.CREATED)
+            return
+
+        if path == "/api/contributor/species":
+            if not self.require_role(session, "contributor"):
+                return
+            payload = self.read_json_body()
+            if payload is None:
+                return
+            markdown = payload.get("markdown")
+            if markdown is not None and not isinstance(markdown, str):
+                self.write_json({"error": "markdown must be a string"}, status=HTTPStatus.BAD_REQUEST)
+                return
+            try:
+                result = create_contributor_species(session.username, markdown)
+            except ValueError as exc:
+                self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST)
+                return
+            self.write_json({"status": "ok", **result}, status=HTTPStatus.CREATED)
+            return
+
         if path.startswith("/api/editor/species/") and path.endswith("/workflow"):
             if not self.require_role(session, "editor"):
                 return
@@ -341,6 +533,229 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
             self.write_json({"status": "ok", **result})
             return
 
+        if path.startswith("/api/editor/species/") and path.endswith("/document"):
+            if not self.require_role(session, "editor"):
+                return
+
+            payload = self.read_json_body()
+            if payload is None:
+                return
+
+            markdown = payload.get("markdown")
+            if not isinstance(markdown, str):
+                self.write_json({"error": "markdown must be a string"}, status=HTTPStatus.BAD_REQUEST)
+                return
+
+            slug = path[len("/api/editor/species/") : -len("/document")].strip("/")
+            try:
+                result = update_species_document_markdown(
+                    slug=slug,
+                    markdown=markdown,
+                    username=session.username,
+                )
+            except ValueError as exc:
+                self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST)
+                return
+
+            if result is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+
+            self.write_json({"status": "ok", **result})
+            return
+
+        if (
+            path.startswith("/api/editor/species/")
+            and "/citations/" in path
+            and not path.endswith("/citations/enrich")
+            and not path.endswith("/citations/backfill")
+        ):
+            if not self.require_role(session, "editor"):
+                return
+
+            payload = self.read_json_body()
+            if payload is None:
+                return
+
+            slug, _, tail = path[len("/api/editor/species/") :].partition("/citations/")
+            if tail.endswith("/enrich"):
+                citation_tail = tail[: -len("/enrich")].strip("/")
+                try:
+                    citation_id = int(citation_tail)
+                except ValueError:
+                    self.write_json({"error": "Invalid citation id"}, status=HTTPStatus.BAD_REQUEST)
+                    return
+
+                result = update_species_citation_enrichment(
+                    slug=slug.strip("/"),
+                    citation_id=citation_id,
+                    username=session.username,
+                )
+                if result is None:
+                    self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                    return
+
+                self.write_json({"status": "ok", **result})
+                return
+
+            if tail.endswith("/apply-match"):
+                citation_tail = tail[: -len("/apply-match")].strip("/")
+                try:
+                    citation_id = int(citation_tail)
+                except ValueError:
+                    self.write_json({"error": "Invalid citation id"}, status=HTTPStatus.BAD_REQUEST)
+                    return
+                candidate = payload.get("candidate")
+                if not isinstance(candidate, dict):
+                    self.write_json({"error": "candidate must be an object"}, status=HTTPStatus.BAD_REQUEST)
+                    return
+                result = apply_species_citation_candidate_selection(
+                    slug=slug.strip("/"),
+                    citation_id=citation_id,
+                    candidate=candidate,
+                    username=session.username,
+                )
+                if result is None:
+                    self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                    return
+                self.write_json({"status": "ok", **result})
+                return
+
+            if tail.endswith("/add-match"):
+                citation_tail = tail[: -len("/add-match")].strip("/")
+                try:
+                    citation_id = int(citation_tail)
+                except ValueError:
+                    self.write_json({"error": "Invalid citation id"}, status=HTTPStatus.BAD_REQUEST)
+                    return
+                candidate = payload.get("candidate")
+                if not isinstance(candidate, dict):
+                    self.write_json({"error": "candidate must be an object"}, status=HTTPStatus.BAD_REQUEST)
+                    return
+                result = add_species_citation_from_candidate(
+                    slug=slug.strip("/"),
+                    citation_id=citation_id,
+                    candidate=candidate,
+                    username=session.username,
+                )
+                if result is None:
+                    self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                    return
+                self.write_json({"status": "ok", **result})
+                return
+
+            try:
+                citation_id = int(tail.strip("/"))
+            except ValueError:
+                self.write_json({"error": "Invalid citation id"}, status=HTTPStatus.BAD_REQUEST)
+                return
+
+            for field in ("review_status", "normalized_text", "abstract_text", "doi", "citation_key", "entry_type", "draft_bibtex"):
+                value = payload.get(field)
+                if value is not None and not isinstance(value, str):
+                    self.write_json(
+                        {"error": f"{field} must be a string"},
+                        status=HTTPStatus.BAD_REQUEST,
+                    )
+                    return
+
+            try:
+                result = update_species_citation_review(
+                    slug=slug.strip("/"),
+                    citation_id=citation_id,
+                    review_status=payload.get("review_status"),
+                    normalized_text=payload.get("normalized_text"),
+                    doi=payload.get("doi"),
+                    citation_key=payload.get("citation_key"),
+                    entry_type=payload.get("entry_type"),
+                    draft_bibtex=payload.get("draft_bibtex"),
+                    abstract_text=payload.get("abstract_text"),
+                    username=session.username,
+                )
+            except ValueError as exc:
+                self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST)
+                return
+
+            if result is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+
+            self.write_json({"status": "ok", **result})
+            return
+
+        if path.startswith("/api/editor/species/") and path.endswith("/citations/enrich"):
+            if not self.require_role(session, "editor"):
+                return
+
+            payload = self.read_json_body()
+            if payload is None:
+                return
+
+            slug = path[len("/api/editor/species/") : -len("/citations/enrich")].strip("/")
+            result = update_species_citations_enrichment_batch(
+                slug=slug,
+                username=session.username,
+            )
+            if result is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+
+            self.write_json({"status": "ok", **result})
+            return
+
+        if path.startswith("/api/editor/species/") and path.endswith("/citations/backfill"):
+            if not self.require_role(session, "editor"):
+                return
+
+            payload = self.read_json_body()
+            if payload is None:
+                return
+
+            slug = path[len("/api/editor/species/") : -len("/citations/backfill")].strip("/")
+            include_accepted = bool(payload.get("include_accepted", False))
+            result = backfill_species_citations(
+                slug=slug,
+                username=session.username,
+                include_accepted=include_accepted,
+            )
+            if result is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+
+            self.write_json({"status": "ok", **result})
+            return
+
+        if path.startswith("/api/contributor/species/") and path.endswith("/document"):
+            if not self.require_role(session, "contributor"):
+                return
+
+            payload = self.read_json_body()
+            if payload is None:
+                return
+
+            markdown = payload.get("markdown")
+            if not isinstance(markdown, str):
+                self.write_json({"error": "markdown must be a string"}, status=HTTPStatus.BAD_REQUEST)
+                return
+
+            slug = path[len("/api/contributor/species/") : -len("/document")].strip("/")
+            try:
+                result = update_contributor_species_document_markdown(
+                    slug=slug,
+                    markdown=markdown,
+                    username=session.username,
+                )
+            except ValueError as exc:
+                self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST)
+                return
+
+            if result is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+
+            self.write_json({"status": "ok", **result})
+            return
+
         self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
 
     def log_message(self, format: str, *args: object) -> None:
diff --git a/apps/api/src/ecospecies_api/auth.py b/apps/api/src/ecospecies_api/auth.py
index 9d7f86c..a03fb8e 100644
--- a/apps/api/src/ecospecies_api/auth.py
+++ b/apps/api/src/ecospecies_api/auth.py
@@ -1,14 +1,21 @@
 from __future__ import annotations
 
+import hashlib
 import os
 from dataclasses import dataclass
 from typing import Mapping
 
+from sqlalchemy import select
+
+from ecospecies_api.db import SessionLocal, create_db_engine
+from ecospecies_api.models import Base, ContributorAccount
+
 
 ROLE_ORDER = {
     "viewer": 1,
-    "editor": 2,
-    "admin": 3,
+    "contributor": 2,
+    "editor": 3,
+    "admin": 4,
 }
 
 
@@ -41,17 +48,27 @@ def _parse_token_entry(entry: str) -> tuple[str, AuthSession]:
 
 
 def get_token_registry() -> dict[str, AuthSession]:
-    configured = os.environ.get("ECOSPECIES_AUTH_TOKENS", "").strip()
-    if not configured:
-        return {}
-
     registry: dict[str, AuthSession] = {}
-    for raw_entry in configured.split(","):
-        entry = raw_entry.strip()
-        if not entry:
-            continue
-        token, session = _parse_token_entry(entry)
-        registry[token] = session
+    configured = os.environ.get("ECOSPECIES_AUTH_TOKENS", "").strip()
+    if configured:
+        for raw_entry in configured.split(","):
+            entry = raw_entry.strip()
+            if not entry:
+                continue
+            token, session = _parse_token_entry(entry)
+            registry[token] = session
+
+    engine = create_db_engine()
+    Base.metadata.create_all(engine)
+    with SessionLocal() as session:
+        for account in session.scalars(
+            select(ContributorAccount).where(ContributorAccount.is_active.is_(True))
+        ):
+            registry[account.token_hash] = AuthSession(
+                token=account.token_hash,
+                username=account.email,
+                role="contributor",
+            )
     return registry
 
 
@@ -70,7 +87,11 @@ def resolve_auth_session(headers: Mapping[str, str]) -> AuthSession | None:
     token = get_bearer_token(headers)
     if not token:
         return None
-    return registry.get(token)
+    direct = registry.get(token)
+    if direct is not None:
+        return direct
+    token_hash = hashlib.sha256(token.encode("utf-8")).hexdigest()
+    return registry.get(token_hash)
 
 
 def auth_is_configured() -> bool:
diff --git a/apps/api/src/ecospecies_api/citation_enrichment.py b/apps/api/src/ecospecies_api/citation_enrichment.py
new file mode 100644
index 0000000..9b13156
--- /dev/null
+++ b/apps/api/src/ecospecies_api/citation_enrichment.py
@@ -0,0 +1,1018 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+import os
+import re
+import sys
+import urllib.error
+import urllib.parse
+import urllib.request
+
+from ecospecies_api.citegeist_bridge import (
+    DraftCitation,
+    HISTORICAL_YEAR_PATTERN,
+    build_standard_citation_key,
+    extract_draft_citation,
+    render_single_bibtex,
+)
+
+
+def _load_citegeist_resolution_components():
+    citegeist_src = Path(__file__).resolve().parents[5] / "CiteGeist" / "src"
+    if citegeist_src.exists() and str(citegeist_src) not in sys.path:
+        sys.path.insert(0, str(citegeist_src))
+    try:
+        from citegeist.bibtex import BibEntry  # type: ignore
+        from citegeist.resolve import MetadataResolver, merge_entries_with_conflicts  # type: ignore
+        from citegeist.sources import SourceClient  # type: ignore
+    except ImportError:
+        return None, None, None, None
+    return BibEntry, MetadataResolver, SourceClient, merge_entries_with_conflicts
+
+
+class LocalSourceClient:
+    def __init__(self, user_agent: str) -> None:
+        self.user_agent = user_agent
+
+    def get_json(self, url: str) -> dict:
+        request = urllib.request.Request(url, headers={"User-Agent": self.user_agent})
+        with urllib.request.urlopen(request, timeout=15) as response:
+            return json.loads(response.read().decode("utf-8"))
+
+
+class LocalBibEntry:
+    def __init__(self, entry_type: str, citation_key: str, fields: dict[str, str]) -> None:
+        self.entry_type = entry_type
+        self.citation_key = citation_key
+        self.fields = fields
+
+
+def _get_cache_dir() -> Path:
+    configured = os.environ.get("ECOSPECIES_CITEGEIST_CACHE_DIR", "").strip()
+    if configured:
+        return Path(configured)
+    return Path("/tmp/ecospecies-citegeist-cache")
+
+
+def _get_fixtures_dir() -> Path | None:
+    configured = os.environ.get("ECOSPECIES_CITEGEIST_FIXTURES_DIR", "").strip()
+    if configured:
+        return Path(configured)
+    return None
+
+
+def _normalize_openalex_id(raw_value: str) -> str:
+    value = raw_value.strip()
+    if not value:
+        return ""
+    if value.startswith("https://openalex.org/"):
+        return value.rsplit("/", 1)[-1]
+    return value
+
+
+def _normalize_text(value: str) -> str:
+    normalized = " ".join(str(value or "").split())
+    normalized = re.sub(r"([\(\[\{])\s+", r"\1", normalized)
+    normalized = re.sub(r"\s+([\)\]\},.;:!?])", r"\1", normalized)
+    return normalized.strip()
+
+
+def _normalize_abstract_text(value: str) -> str:
+    normalized = _normalize_text(value)
+    return re.sub(r"^abstract\s*[:.\-]?\s*", "", normalized, flags=re.IGNORECASE)
+
+
+def _normalize_person_display_name(value: str) -> str:
+    normalized = _normalize_text(value)
+    if "," not in normalized:
+        return normalized
+
+    left, right = [part.strip() for part in normalized.split(",", 1)]
+    if not (_looks_like_initial_block(left) and right):
+        return normalized
+
+    right_tokens = right.split()
+    trailing_initials: list[str] = []
+    while right_tokens and _looks_like_initial_block(right_tokens[-1]):
+        trailing_initials.insert(0, right_tokens.pop())
+    if not right_tokens:
+        return normalized
+
+    family = " ".join(right_tokens).strip()
+    given_parts = [
+        _initial_block_to_given_names(" ".join(trailing_initials)),
+        _initial_block_to_given_names(left),
+    ]
+    given = " ".join(part for part in given_parts if part).strip()
+    return f"{family}, {given}" if given else family
+
+
+def _looks_like_initial_block(value: str) -> bool:
+    letters = re.sub(r"[^A-Za-z]+", "", value)
+    return 0 < len(letters) <= 4 and letters.upper() == letters
+
+
+def _initial_block_to_given_names(value: str) -> str:
+    letters = re.findall(r"[A-Za-z]", value)
+    return " ".join(f"{letter.upper()}." for letter in letters)
+
+
+def _openalex_abstract_text(inverted_index: dict) -> str:
+    positions: dict[int, str] = {}
+    for word, indexes in inverted_index.items():
+        for index in indexes:
+            positions[int(index)] = word
+    text = _normalize_text(" ".join(word for _, word in sorted(positions.items())))
+    return "" if _looks_like_openalex_page_blob(text) else text
+
+
+def _looks_like_openalex_page_blob(text: str) -> bool:
+    lowered = text.casefold()
+    blob_markers = (
+        "research article|",
+        "download citation file",
+        "this content is only available via pdf",
+        "get citation alerts",
+        "views icon",
+        "toolbar search",
+        "publisher site get access",
+        "authors info & claims",
+        "publication history",
+        "copyright ",
+    )
+    return len(text) > 60 and any(marker in lowered for marker in blob_markers)
+
+
+def _crossref_message_to_entry(message: dict) -> LocalBibEntry:
+    authors = []
+    for author in message.get("author", []):
+        family = str(author.get("family", "")).strip()
+        given = str(author.get("given", "")).strip()
+        full_name = ", ".join(part for part in (family, given) if part)
+        if full_name:
+            authors.append(full_name)
+    title = ""
+    title_values = message.get("title", [])
+    if isinstance(title_values, list) and title_values:
+        title = str(title_values[0]).strip()
+    year_parts = (
+        message.get("issued", {}).get("date-parts", [[None]])
+        if isinstance(message.get("issued"), dict)
+        else [[None]]
+    )
+    year = str(year_parts[0][0] or "").strip()
+    doi = str(message.get("DOI", "")).strip()
+    journal = ""
+    container = message.get("container-title", [])
+    if isinstance(container, list) and container:
+        journal = str(container[0]).strip()
+    abstract = _normalize_abstract_text(str(message.get("abstract", "")).strip())
+    fields = {
+        "author": " and ".join(_normalize_person_display_name(name) for name in authors if name),
+        "year": year,
+        "title": _normalize_text(title),
+        "journal": _normalize_text(journal),
+        "doi": doi,
+        "url": str(message.get("URL", "")).strip(),
+        "volume": str(message.get("volume", "")).strip(),
+        "number": str(message.get("issue", "")).strip(),
+        "pages": str(message.get("page", "")).strip(),
+        "abstract": abstract,
+    }
+    citation_key = build_standard_citation_key(
+        authors=fields.get("author", ""),
+        year=year,
+        title=title,
+        fallback_text=title,
+    )
+    return LocalBibEntry("article" if journal else "misc", citation_key, {key: value for key, value in fields.items() if value})
+
+
+def _datacite_item_to_entry(data: dict) -> LocalBibEntry:
+    attributes = data.get("attributes", {}) if isinstance(data.get("attributes"), dict) else {}
+    titles = attributes.get("titles", [])
+    title = str(titles[0].get("title", "")).strip() if titles else ""
+    creators = []
+    for creator in attributes.get("creators", []):
+        family = str(creator.get("familyName", "")).strip()
+        given = str(creator.get("givenName", "")).strip()
+        name = ", ".join(part for part in (family, given) if part) or str(creator.get("name", "")).strip()
+        if name:
+            creators.append(_normalize_person_display_name(name))
+    year = str(attributes.get("publicationYear", "")).strip()
+    doi = str(attributes.get("doi", "")).strip()
+    publisher = str(attributes.get("publisher", "")).strip()
+    url = str(attributes.get("url", "")).strip()
+    container = str(attributes.get("container", "")).strip()
+    first_page = str(attributes.get("firstPage", "")).strip()
+    last_page = str(attributes.get("lastPage", "")).strip()
+    volume = str(attributes.get("volume", "")).strip()
+    issue = str(attributes.get("issue", "")).strip()
+    pages = ""
+    if first_page and last_page:
+        pages = f"{first_page}-{last_page}"
+    elif first_page:
+        pages = first_page
+    abstract = ""
+    for description in attributes.get("descriptions", []):
+        if str(description.get("descriptionType", "")).strip().lower() == "abstract":
+            abstract = _normalize_abstract_text(str(description.get("description", "")).strip())
+            if abstract:
+                break
+    fields = {
+        "author": " and ".join(creators),
+        "year": year,
+        "title": _normalize_text(title),
+        "publisher": _normalize_text(publisher),
+        "doi": doi,
+        "url": url,
+        "journal": _normalize_text(container),
+        "volume": volume,
+        "number": issue,
+        "pages": pages,
+        "abstract": abstract,
+    }
+    citation_key = build_standard_citation_key(
+        authors=fields.get("author", ""),
+        year=year,
+        title=title,
+        fallback_text=title,
+    )
+    return LocalBibEntry("book" if publisher else "misc", citation_key, {key: value for key, value in fields.items() if value})
+
+
+def _openalex_work_to_entry(work: dict) -> LocalBibEntry:
+    authors = []
+    for authorship in work.get("authorships", []):
+        author_name = _normalize_person_display_name(str(authorship.get("author", {}).get("display_name", "")).strip())
+        if author_name:
+            authors.append(author_name)
+    doi = str(work.get("doi", "")).strip().removeprefix("https://doi.org/")
+    primary_location = work.get("primary_location", {})
+    source = primary_location.get("source", {}) if isinstance(primary_location, dict) else {}
+    if not isinstance(source, dict):
+        source = {}
+    title = str(work.get("display_name", "")).strip()
+    year = str(work.get("publication_year", "")).strip()
+    journal = str(source.get("display_name", "")).strip()
+    openalex_id = _normalize_openalex_id(str(work.get("id", "")))
+    biblio = work.get("biblio", {}) if isinstance(work.get("biblio"), dict) else {}
+    first_page = str(biblio.get("first_page", "")).strip()
+    last_page = str(biblio.get("last_page", "")).strip()
+    pages = ""
+    if first_page and last_page:
+        pages = f"{first_page}-{last_page}"
+    elif first_page:
+        pages = first_page
+    abstract = ""
+    if isinstance(work.get("abstract_inverted_index"), dict):
+        abstract = _openalex_abstract_text(work.get("abstract_inverted_index", {}))
+    fields = {
+        "author": " and ".join(authors),
+        "year": year,
+        "title": _normalize_text(title),
+        "journal": _normalize_text(journal),
+        "doi": doi,
+        "openalex": openalex_id,
+        "url": f"https://openalex.org/{openalex_id}" if openalex_id else "",
+        "volume": str(biblio.get("volume", "")).strip(),
+        "number": str(biblio.get("issue", "")).strip(),
+        "pages": pages,
+        "abstract": abstract,
+    }
+    citation_key = build_standard_citation_key(
+        authors=fields.get("author", ""),
+        year=year,
+        title=title,
+        fallback_text=title or openalex_id,
+    )
+    return LocalBibEntry("article" if journal else "misc", citation_key, {key: value for key, value in fields.items() if value})
+
+
+def _normalized_title(value: str) -> str:
+    return re.sub(r"[^a-z0-9]+", " ", value.lower()).strip()
+
+
+def _normalized_tokens(value: str) -> list[str]:
+    return [token for token in _normalized_title(value).split() if token]
+
+
+def _title_similarity(query_title: str, candidate_title: str) -> float:
+    query_tokens = _normalized_tokens(query_title)
+    candidate_tokens = _normalized_tokens(candidate_title)
+    if not query_tokens or not candidate_tokens:
+        return 0.0
+    overlap = len(set(query_tokens) & set(candidate_tokens))
+    longest = max(len(set(query_tokens)), len(set(candidate_tokens)), 1)
+    return overlap / longest
+
+
+def _select_best_title_match(entries: list[LocalBibEntry], title: str, year: str = "") -> LocalBibEntry | None:
+    normalized_query = _normalized_title(title)
+    best_entry: LocalBibEntry | None = None
+    best_score = 0.0
+    for entry in entries:
+        entry_title = _normalized_title(entry.fields.get("title", ""))
+        if not entry_title:
+            continue
+        if entry_title == normalized_query:
+            if year and entry.fields.get("year", "") and entry.fields.get("year", "") != year:
+                continue
+            return entry
+        if year and entry.fields.get("year", "") and entry.fields.get("year", "") != year:
+            continue
+        score = _title_similarity(title, entry.fields.get("title", ""))
+        if score > best_score:
+            best_score = score
+            best_entry = entry
+    if best_score >= 0.85:
+        return best_entry
+    return None
+
+
+class LocalResolution:
+    def __init__(self, entry: LocalBibEntry, source_label: str) -> None:
+        self.entry = entry
+        self.source_label = source_label
+
+
+class LocalMetadataResolver:
+    def __init__(self, user_agent: str = "ecospecies/0.1 (citation enrichment)") -> None:
+        self.source_client = LocalSourceClient(user_agent=user_agent)
+
+    def resolve_entry(self, entry: LocalBibEntry) -> LocalResolution | None:
+        doi = entry.fields.get("doi", "").strip()
+        if doi:
+            resolved = self.resolve_doi(doi)
+            if resolved is not None:
+                return resolved
+            resolved = self.resolve_datacite_doi(doi)
+            if resolved is not None:
+                return resolved
+
+        openalex_id = entry.fields.get("openalex", "").strip()
+        if openalex_id:
+            resolved = self.resolve_openalex(openalex_id)
+            if resolved is not None:
+                return resolved
+
+        title = entry.fields.get("title", "").strip()
+        if title:
+            year = entry.fields.get("year", "").strip()
+            resolved = self.search_crossref_best_match(title, year=year)
+            if resolved is not None:
+                return resolved
+            resolved = self.search_datacite_best_match(title, year=year)
+            if resolved is not None:
+                return resolved
+            resolved = self.search_openalex_best_match(title, year=year)
+            if resolved is not None:
+                return resolved
+        return None
+
+    def resolve_doi(self, doi: str) -> LocalResolution | None:
+        encoded = urllib.parse.quote(doi, safe="")
+        payload = self._safe_get_json(f"https://api.crossref.org/works/{encoded}")
+        if payload is None:
+            return None
+        message = payload.get("message", {})
+        if not message:
+            return None
+        return LocalResolution(_crossref_message_to_entry(message), f"crossref:doi:{doi}")
+
+    def resolve_datacite_doi(self, doi: str) -> LocalResolution | None:
+        encoded = urllib.parse.quote(doi, safe="")
+        payload = self._safe_get_json(f"https://api.datacite.org/dois/{encoded}")
+        if payload is None:
+            return None
+        data = payload.get("data", {})
+        if not data:
+            return None
+        return LocalResolution(_datacite_item_to_entry(data), f"datacite:doi:{doi}")
+
+    def resolve_openalex(self, openalex_id: str) -> LocalResolution | None:
+        normalized = _normalize_openalex_id(openalex_id)
+        payload = self._safe_get_json(f"https://api.openalex.org/works/{normalized}")
+        if payload is None or not payload:
+            return None
+        return LocalResolution(_openalex_work_to_entry(payload), f"openalex:id:{normalized}")
+
+    def search_crossref_best_match(self, title: str, year: str = "") -> LocalResolution | None:
+        entries = self.search_crossref_candidates(title)
+        best = _select_best_title_match([item.entry for item in entries], title=title, year=year)
+        if best is None:
+            return None
+        for candidate in entries:
+            if candidate.entry is best:
+                return candidate
+        return None
+
+    def search_datacite_best_match(self, title: str, year: str = "") -> LocalResolution | None:
+        entries = self.search_datacite_candidates(title)
+        best = _select_best_title_match([item.entry for item in entries], title=title, year=year)
+        if best is None:
+            return None
+        for candidate in entries:
+            if candidate.entry is best:
+                return candidate
+        return None
+
+    def search_openalex_best_match(self, title: str, year: str = "") -> LocalResolution | None:
+        entries = self.search_openalex_candidates(title)
+        best = _select_best_title_match([item.entry for item in entries], title=title, year=year)
+        if best is None:
+            return None
+        for candidate in entries:
+            if candidate.entry is best:
+                return candidate
+        return None
+
+    def search_crossref_candidates(self, title: str) -> list[LocalResolution]:
+        query = urllib.parse.urlencode({"query.title": title, "rows": 5})
+        payload = self._safe_get_json(f"https://api.crossref.org/works?{query}")
+        if payload is None:
+            return []
+        results: list[LocalResolution] = []
+        for index, item in enumerate(payload.get("message", {}).get("items", []), start=1):
+            entry = _crossref_message_to_entry(item)
+            if not _should_keep_candidate_entry(entry):
+                continue
+            results.append(LocalResolution(entry, _candidate_source_label("crossref:search", entry, index)))
+        return results
+
+    def search_datacite_candidates(self, title: str) -> list[LocalResolution]:
+        query = urllib.parse.urlencode({"query": title, "page[size]": 5})
+        payload = self._safe_get_json(f"https://api.datacite.org/dois?{query}")
+        if payload is None:
+            return []
+        results: list[LocalResolution] = []
+        for index, item in enumerate(payload.get("data", []), start=1):
+            entry = _datacite_item_to_entry(item)
+            if not _should_keep_candidate_entry(entry):
+                continue
+            results.append(LocalResolution(entry, _candidate_source_label("datacite:search", entry, index)))
+        return results
+
+    def search_openalex_candidates(self, title: str) -> list[LocalResolution]:
+        query = urllib.parse.urlencode({"search": title, "per-page": 5})
+        payload = self._safe_get_json(f"https://api.openalex.org/works?{query}")
+        if payload is None:
+            return []
+        results: list[LocalResolution] = []
+        for index, item in enumerate(payload.get("results", []), start=1):
+            entry = _openalex_work_to_entry(item)
+            if not _should_keep_candidate_entry(entry):
+                continue
+            results.append(LocalResolution(entry, _candidate_source_label("openalex:search", entry, index)))
+        return results
+
+    def _safe_get_json(self, url: str) -> dict | None:
+        try:
+            return self.source_client.get_json(url)
+        except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError, ValueError):
+            return None
+
+
+def _candidate_source_label(prefix: str, entry: LocalBibEntry, index: int) -> str:
+    basis = (
+        entry.fields.get("doi", "").strip()
+        or entry.fields.get("openalex", "").strip()
+        or entry.citation_key.strip()
+        or entry.fields.get("title", "").strip()
+        or str(index)
+    )
+    suffix = re.sub(r"[^a-z0-9]+", "-", basis.lower()).strip("-")[:80] or str(index)
+    return f"{prefix}:{index}:{suffix}"
+
+
+def _should_keep_candidate_entry(entry: LocalBibEntry) -> bool:
+    title = _normalize_text(entry.fields.get("title", ""))
+    if not title:
+        return False
+    lowered = title.lower()
+    if lowered.startswith("referenced work ") or lowered.startswith("untitled"):
+        return False
+    if entry.entry_type == "misc" and not (
+        entry.fields.get("doi", "").strip()
+        or entry.fields.get("openalex", "").strip()
+        or entry.fields.get("journal", "").strip()
+        or entry.fields.get("booktitle", "").strip()
+    ):
+        return False
+    return True
+
+
+def _merge_entries(base_entry, resolved_entry) -> tuple[LocalBibEntry, list[dict[str, str]]]:
+    merged_fields = dict(base_entry.fields)
+    conflicts: list[dict[str, str]] = []
+    for key, value in resolved_entry.fields.items():
+        if not value:
+            continue
+        current_value = merged_fields.get(key, "")
+        if _is_placeholder_field_value(key, current_value):
+            merged_fields[key] = value
+            continue
+        if not current_value:
+            merged_fields[key] = value
+            continue
+        if current_value != value:
+            conflicts.append(
+                {
+                    "field_name": key,
+                    "current_value": current_value,
+                    "proposed_value": value,
+                }
+            )
+    return LocalBibEntry(
+        entry_type=(base_entry.entry_type if base_entry.entry_type != "misc" else resolved_entry.entry_type),
+        citation_key=base_entry.citation_key,
+        fields=merged_fields,
+    ), conflicts
+
+
+def _is_placeholder_field_value(field_name: str, value: str) -> bool:
+    normalized = " ".join((value or "").split()).strip()
+    if not normalized:
+        return True
+    lowered = normalized.lower()
+    if field_name == "title":
+        if lowered.startswith("referenced work ") or lowered.startswith("untitled"):
+            return True
+        if re.match(rf"^[^,]+,\s*.+?\s{HISTORICAL_YEAR_PATTERN}\.\s+", normalized):
+            return True
+    if field_name == "author":
+        if lowered in {"reference", "unknown", "unknown author"}:
+            return True
+    return False
+
+
+def _dedupe_note_parts(parts: list[str]) -> list[str]:
+    deduped: list[str] = []
+    seen: set[str] = set()
+    for part in parts:
+        for segment in part.split(";"):
+            compact = segment.strip()
+            if not compact or compact in seen:
+                continue
+            seen.add(compact)
+            deduped.append(compact)
+    return deduped
+
+
+def _looks_like_raw_reference_title(current_value: str, resolved_value: str) -> bool:
+    normalized_current = " ".join((current_value or "").split()).strip()
+    normalized_resolved = " ".join((resolved_value or "").split()).strip()
+    if not normalized_current or not normalized_resolved:
+        return False
+    if normalized_current == normalized_resolved:
+        return False
+    if _is_placeholder_field_value("title", normalized_current):
+        return True
+    if re.match(r"^[^,]+,\s+.+", normalized_current) and re.search(rf"\b{HISTORICAL_YEAR_PATTERN}\.\s+", normalized_current[:48]):
+        return True
+    comparison_current = re.sub(r"[^a-z0-9]+", " ", normalized_current.lower()).strip()
+    comparison_resolved = re.sub(r"[^a-z0-9]+", " ", normalized_resolved.lower()).strip()
+    if comparison_resolved and comparison_resolved in comparison_current and re.match(rf"^[^,]+,.*\b{HISTORICAL_YEAR_PATTERN}\.\s+", normalized_current):
+        return True
+    return False
+
+
+def _apply_placeholder_overrides(merged_fields: dict[str, str], base_fields: dict[str, str], resolved_fields: dict[str, str]) -> None:
+    for field_name in ("title", "author"):
+        current_value = str(base_fields.get(field_name, "")).strip()
+        resolved_value = str(resolved_fields.get(field_name, "")).strip()
+        if field_name == "title" and resolved_value and _looks_like_raw_reference_title(current_value, resolved_value):
+            merged_fields[field_name] = resolved_value
+            continue
+        if _is_placeholder_field_value(field_name, current_value) and resolved_value:
+            merged_fields[field_name] = resolved_value
+
+
+def _citation_raw_text_looks_like_reference(raw_text: str) -> bool:
+    normalized = " ".join((raw_text or "").split()).strip()
+    return bool(
+        normalized
+        and (
+            (re.match(r"^[^,]+,\s+.+", normalized) and re.search(rf"\b{HISTORICAL_YEAR_PATTERN}\.\s+", normalized[:48]))
+            or len(normalized) > 80
+        )
+    )
+
+
+def _build_base_citation(citation: dict[str, object]) -> DraftCitation | None:
+    raw_text = str(citation.get("raw_text", "")).strip()
+    legacy_reference_number = str(citation.get("legacy_reference_number", "")).strip()
+    if not raw_text:
+        return None
+    draft = extract_draft_citation(raw_text, legacy_reference_number)
+    if draft is None:
+        return None
+    if citation.get("doi"):
+        draft.fields["doi"] = str(citation.get("doi", "")).strip()
+    if citation.get("openalex_id"):
+        draft.fields["openalex"] = str(citation.get("openalex_id", "")).strip()
+    if citation.get("source_url"):
+        draft.fields["url"] = str(citation.get("source_url", "")).strip()
+    if citation.get("normalized_text") and not draft.fields.get("note"):
+        draft.fields["note"] = str(citation.get("normalized_text", "")).strip()
+    return draft
+
+
+def _render_normalized_text(entry_type: str, fields: dict[str, str]) -> str:
+    parts: list[str] = []
+    author = fields.get("author", "").strip()
+    year = fields.get("year", "").strip()
+    title = fields.get("title", "").strip()
+    venue = (
+        fields.get("journal", "").strip()
+        or fields.get("booktitle", "").strip()
+        or fields.get("publisher", "").strip()
+        or fields.get("howpublished", "").strip()
+    )
+    volume = fields.get("volume", "").strip()
+    number = fields.get("number", "").strip()
+    pages = fields.get("pages", "").strip()
+    doi = fields.get("doi", "").strip()
+    url = fields.get("url", "").strip()
+
+    if author:
+        parts.append(author)
+    if year:
+        parts.append(f"({year})")
+    if title:
+        parts.append(title)
+    if venue:
+        venue_detail = venue
+        if volume:
+            venue_detail += f", {volume}"
+            if number:
+                venue_detail += f"({number})"
+        elif number:
+            venue_detail += f", ({number})"
+        if pages:
+            venue_detail += f": {pages}"
+        parts.append(venue_detail)
+    elif pages:
+        parts.append(f"pp. {pages}")
+    if entry_type == "book" and fields.get("publisher", "").strip() and fields.get("publisher", "").strip() not in venue:
+        parts.append(fields["publisher"].strip())
+    if doi:
+        parts.append(f"DOI:{doi}")
+    elif url:
+        parts.append(url)
+    return ". ".join(part.strip(" .") for part in parts if part).strip()
+
+
+def _normalize_venue(value: str) -> str:
+    lowered = value.lower()
+    lowered = re.sub(r"\b(comm|rept|rep|proc|trans)\.\b", "", lowered)
+    lowered = re.sub(r"\b(commission|report|proceedings|transactions|journal|bulletin|review|letters)\b", "", lowered)
+    return re.sub(r"[^a-z0-9]+", " ", lowered).strip()
+
+
+def _normalize_pages(value: str) -> str:
+    return re.sub(r"\s+", "", value).replace("--", "-").strip()
+
+
+def _venue_fields(fields: dict[str, str]) -> str:
+    return (
+        fields.get("journal", "").strip()
+        or fields.get("booktitle", "").strip()
+        or fields.get("publisher", "").strip()
+        or fields.get("howpublished", "").strip()
+    )
+
+
+def _author_overlap_score(base_author: str, candidate_author: str) -> float:
+    base_tokens = {token for token in _normalized_tokens(base_author) if len(token) > 1}
+    candidate_tokens = {token for token in _normalized_tokens(candidate_author) if len(token) > 1}
+    if not base_tokens or not candidate_tokens:
+        return 0.0
+    return len(base_tokens & candidate_tokens) / max(len(base_tokens), len(candidate_tokens), 1)
+
+
+def _venue_overlap_score(base_venue: str, candidate_venue: str) -> float:
+    base_tokens = set(_normalize_venue(base_venue).split())
+    candidate_tokens = set(_normalize_venue(candidate_venue).split())
+    if not base_tokens or not candidate_tokens:
+        return 0.0
+    return len(base_tokens & candidate_tokens) / max(len(base_tokens), len(candidate_tokens), 1)
+
+
+def _text_overlap_score(base_text: str, candidate_text: str) -> float:
+    base_tokens = {token for token in _normalized_tokens(base_text) if len(token) > 2}
+    candidate_tokens = {token for token in _normalized_tokens(candidate_text) if len(token) > 2}
+    if not base_tokens or not candidate_tokens:
+        return 0.0
+    return len(base_tokens & candidate_tokens) / max(len(base_tokens), len(candidate_tokens), 1)
+
+
+def _compare_field(seed_value: str, candidate_value: str, *, similarity: float = 0.0) -> str:
+    if not seed_value:
+        return "seed-missing"
+    if not candidate_value:
+        return "candidate-missing"
+    if seed_value == candidate_value:
+        return "exact"
+    if similarity >= 0.6:
+        return "partial"
+    return "conflict"
+
+
+def _build_match_details(seed_fields: dict[str, str], candidate_fields: dict[str, str]) -> tuple[dict[str, dict[str, object]], float]:
+    title_similarity = _title_similarity(seed_fields.get("title", ""), candidate_fields.get("title", ""))
+    author_similarity = _author_overlap_score(seed_fields.get("author", ""), candidate_fields.get("author", ""))
+    venue_similarity = _venue_overlap_score(_venue_fields(seed_fields), _venue_fields(candidate_fields))
+    abstract_similarity = _text_overlap_score(seed_fields.get("abstract", ""), candidate_fields.get("abstract", ""))
+
+    comparisons = [
+        ("author", seed_fields.get("author", "").strip(), candidate_fields.get("author", "").strip(), 10.0, author_similarity),
+        ("year", seed_fields.get("year", "").strip(), candidate_fields.get("year", "").strip(), 16.0, 1.0 if seed_fields.get("year", "").strip() == candidate_fields.get("year", "").strip() and seed_fields.get("year", "").strip() else 0.0),
+        ("title", seed_fields.get("title", "").strip(), candidate_fields.get("title", "").strip(), 34.0, title_similarity),
+        ("abstract", seed_fields.get("abstract", "").strip(), candidate_fields.get("abstract", "").strip(), 8.0, abstract_similarity),
+        ("venue", _venue_fields(seed_fields), _venue_fields(candidate_fields), 16.0, venue_similarity),
+        ("volume", seed_fields.get("volume", "").strip(), candidate_fields.get("volume", "").strip(), 10.0, 1.0 if seed_fields.get("volume", "").strip() == candidate_fields.get("volume", "").strip() and seed_fields.get("volume", "").strip() else 0.0),
+        ("number", seed_fields.get("number", "").strip(), candidate_fields.get("number", "").strip(), 4.0, 1.0 if seed_fields.get("number", "").strip() == candidate_fields.get("number", "").strip() and seed_fields.get("number", "").strip() else 0.0),
+        ("pages", _normalize_pages(seed_fields.get("pages", "")), _normalize_pages(candidate_fields.get("pages", "")), 10.0, 1.0 if _normalize_pages(seed_fields.get("pages", "")) == _normalize_pages(candidate_fields.get("pages", "")) and _normalize_pages(seed_fields.get("pages", "")) else 0.0),
+    ]
+
+    score = 0.0
+    details: dict[str, dict[str, object]] = {}
+    for field_name, seed_value, candidate_value, weight, similarity in comparisons:
+        status = _compare_field(seed_value, candidate_value, similarity=similarity)
+        field_score = 0.0
+        if status == "exact":
+            field_score = weight
+        elif status == "partial":
+            field_score = round(weight * min(similarity, 1.0), 2)
+        elif status == "seed-missing":
+            field_score = round(weight * 0.35, 2) if candidate_value else 0.0
+        score += field_score
+        details[field_name] = {
+            "seed": seed_value,
+            "candidate": candidate_value,
+            "status": status,
+            "weight": weight,
+            "score": field_score,
+        }
+
+    return details, round(score, 2)
+
+
+def _seed_metadata_conflict(base_fields: dict[str, str], resolved_fields: dict[str, str]) -> str:
+    base_year = base_fields.get("year", "").strip()
+    resolved_year = resolved_fields.get("year", "").strip()
+    if base_year and resolved_year and base_year != resolved_year:
+        return f"year mismatch: seed {base_year}, resolved {resolved_year}"
+
+    base_venue = _normalize_venue(_venue_fields(base_fields))
+    resolved_venue = _normalize_venue(_venue_fields(resolved_fields))
+    if base_venue and resolved_venue and base_venue != resolved_venue:
+        base_tokens = set(base_venue.split())
+        resolved_tokens = set(resolved_venue.split())
+        if not (base_tokens and resolved_tokens and base_tokens & resolved_tokens):
+            return "venue mismatch between citation seed and resolved metadata"
+
+    base_volume = base_fields.get("volume", "").strip()
+    resolved_volume = resolved_fields.get("volume", "").strip()
+    if base_volume and resolved_volume and base_volume != resolved_volume:
+        return f"volume mismatch: seed {base_volume}, resolved {resolved_volume}"
+
+    base_number = base_fields.get("number", "").strip()
+    resolved_number = resolved_fields.get("number", "").strip()
+    if base_number and resolved_number and base_number != resolved_number:
+        return f"issue mismatch: seed {base_number}, resolved {resolved_number}"
+
+    base_pages = _normalize_pages(base_fields.get("pages", ""))
+    resolved_pages = _normalize_pages(resolved_fields.get("pages", ""))
+    if base_pages and resolved_pages and base_pages != resolved_pages:
+        return f"pages mismatch: seed {base_pages}, resolved {resolved_pages}"
+
+    return ""
+
+
+def _candidate_to_payload(seed: DraftCitation, resolution: LocalResolution) -> dict[str, object]:
+    field_matches, score = _build_match_details(seed.fields, resolution.entry.fields)
+    candidate_fields = {key: value for key, value in resolution.entry.fields.items() if value}
+    citation_key = build_standard_citation_key(
+        authors=candidate_fields.get("author", ""),
+        year=candidate_fields.get("year", ""),
+        title=candidate_fields.get("title", ""),
+        fallback_text=candidate_fields.get("title", "") or seed.fields.get("title", ""),
+    )
+    draft_bibtex = render_single_bibtex(resolution.entry.entry_type, citation_key, candidate_fields)
+    return {
+        "candidate_id": re.sub(r"[^a-z0-9]+", "-", resolution.source_label.lower()).strip("-"),
+        "source_label": resolution.source_label,
+        "entry_type": resolution.entry.entry_type,
+        "citation_key": citation_key,
+        "fields": candidate_fields,
+        "abstract_text": candidate_fields.get("abstract", "").strip(),
+        "normalized_text": _render_normalized_text(resolution.entry.entry_type, candidate_fields),
+        "draft_bibtex": draft_bibtex,
+        "score": score,
+        "field_matches": field_matches,
+        "conflict_reason": _seed_metadata_conflict(seed.fields, resolution.entry.fields),
+    }
+
+
+def discover_citation_candidates(
+    citation: dict[str, object],
+    resolver: LocalMetadataResolver | None = None,
+) -> dict[str, object]:
+    base = _build_base_citation(citation)
+    if base is None:
+        return {"error": "Citation has no raw text to enrich."}
+    if resolver is None:
+        resolver = LocalMetadataResolver()
+
+    title = base.fields.get("title", "").strip()
+    candidates: list[dict[str, object]] = []
+    seen_keys: set[str] = set()
+    if title:
+        for resolution in (
+            resolver.search_crossref_candidates(title)
+            + resolver.search_datacite_candidates(title)
+            + resolver.search_openalex_candidates(title)
+        ):
+            identity = (
+                resolution.entry.fields.get("doi", "").strip()
+                or resolution.entry.fields.get("openalex", "").strip()
+                or f"{_normalized_title(resolution.entry.fields.get('title', ''))}:{resolution.entry.fields.get('year', '').strip()}"
+            )
+            if identity in seen_keys:
+                continue
+            seen_keys.add(identity)
+            candidates.append(_candidate_to_payload(base, resolution))
+
+    candidates.sort(
+        key=lambda item: (
+            -float(item.get("score", 0.0)),
+            len(str(item.get("conflict_reason", "")).strip()),
+            str(item.get("source_label", "")),
+        )
+    )
+    seed_payload = {
+        "entry_type": base.entry_type,
+        "citation_key": base.citation_key,
+        "fields": dict(base.fields),
+        "abstract_text": base.fields.get("abstract", "").strip(),
+        "normalized_text": _render_normalized_text(base.entry_type, base.fields),
+        "draft_bibtex": render_single_bibtex(base.entry_type, base.citation_key, base.fields),
+    }
+    return {
+        "seed": seed_payload,
+        "candidate_count": len(candidates),
+        "candidates": candidates[:8],
+    }
+
+
+def apply_citation_candidate_selection(
+    citation: dict[str, object],
+    candidate: dict[str, object],
+) -> dict[str, object]:
+    base = _build_base_citation(citation)
+    if base is None:
+        return {
+            "enrichment_status": "error",
+            "enrichment_error": "Citation has no raw text to enrich.",
+        }
+
+    selected_fields = {
+        str(key): str(value).strip()
+        for key, value in dict(candidate.get("fields", {})).items()
+        if str(value).strip()
+    }
+    entry_type = str(candidate.get("entry_type", "")).strip() or "misc"
+    merged_fields = dict(base.fields)
+    for key, value in selected_fields.items():
+        merged_fields[key] = value
+
+    if citation.get("legacy_reference_number"):
+        note_parts = [merged_fields.get("note", "").strip()]
+        note_parts.append(
+            f"ecospecies_reference_number = {{{str(citation.get('legacy_reference_number', '')).strip()}}}"
+        )
+        merged_fields["note"] = "; ".join(_dedupe_note_parts(note_parts))
+
+    citation_key = build_standard_citation_key(
+        authors=merged_fields.get("author", ""),
+        year=merged_fields.get("year", ""),
+        title=merged_fields.get("title", ""),
+        fallback_text=str(citation.get("raw_text", "")).strip(),
+    )
+    draft_bibtex = render_single_bibtex(entry_type, citation_key, merged_fields)
+    return {
+        "citation_key": citation_key,
+        "entry_type": entry_type,
+        "normalized_text": _render_normalized_text(entry_type, merged_fields),
+        "abstract_text": merged_fields.get("abstract", "").strip(),
+        "draft_bibtex": draft_bibtex,
+        "doi": merged_fields.get("doi", "").strip(),
+        "source_url": merged_fields.get("url", "").strip(),
+        "openalex_id": merged_fields.get("openalex", "").strip(),
+        "resolver_source_label": f"editor:selected:{str(candidate.get('source_label', '')).strip()}",
+        "enrichment_status": "resolved",
+        "enrichment_error": "",
+        "conflicts": [],
+    }
+
+
+def enrich_citation_payload(
+    citation: dict[str, object],
+    resolver=None,
+) -> dict[str, object]:
+    base = _build_base_citation(citation)
+    if base is None:
+        return {
+            "enrichment_status": "error",
+            "enrichment_error": "Citation has no raw text to enrich.",
+        }
+
+    seed_payload = {
+        "citation_key": base.citation_key,
+        "entry_type": base.entry_type,
+        "normalized_text": _render_normalized_text(base.entry_type, base.fields),
+        "abstract_text": base.fields.get("abstract", "").strip(),
+        "draft_bibtex": render_single_bibtex(base.entry_type, base.citation_key, base.fields),
+        "doi": base.fields.get("doi", "").strip(),
+        "source_url": base.fields.get("url", "").strip(),
+        "openalex_id": base.fields.get("openalex", "").strip(),
+        "resolver_source_label": "",
+    }
+
+    BibEntry, MetadataResolver, SourceClient, merge_entries_with_conflicts = _load_citegeist_resolution_components()
+    if MetadataResolver is not None and SourceClient is not None and BibEntry is not None and merge_entries_with_conflicts is not None:
+        if resolver is None:
+            resolver = MetadataResolver(
+                user_agent="ecospecies/0.1 (citation enrichment)",
+                source_client=SourceClient(
+                    user_agent="ecospecies/0.1 (citation enrichment)",
+                    cache_dir=_get_cache_dir(),
+                    fixtures_dir=_get_fixtures_dir(),
+                ),
+            )
+        resolution = resolver.resolve_entry(
+            BibEntry(entry_type=base.entry_type, citation_key=base.citation_key, fields=dict(base.fields))
+        )
+        merger = merge_entries_with_conflicts
+        base_entry = BibEntry(entry_type=base.entry_type, citation_key=base.citation_key, fields=dict(base.fields))
+    else:
+        if resolver is None:
+            resolver = LocalMetadataResolver()
+        resolution = resolver.resolve_entry(
+            LocalBibEntry(entry_type=base.entry_type, citation_key=base.citation_key, fields=dict(base.fields))
+        )
+        merger = _merge_entries
+        base_entry = LocalBibEntry(entry_type=base.entry_type, citation_key=base.citation_key, fields=dict(base.fields))
+
+    if resolution is None:
+        return {
+            **seed_payload,
+            "enrichment_status": "unresolved",
+            "enrichment_error": "No metadata match found from DOI, title, or authority identifiers.",
+        }
+
+    seed_conflict = _seed_metadata_conflict(base_entry.fields, resolution.entry.fields)
+    if seed_conflict:
+        return {
+            **seed_payload,
+            "enrichment_status": "unresolved",
+            "enrichment_error": f"Resolved metadata conflicts with citation seed fields: {seed_conflict}.",
+        }
+
+    merged, conflicts = merger(base_entry, resolution.entry)
+    _apply_placeholder_overrides(merged.fields, base_entry.fields, resolution.entry.fields)
+    resolved_title = str(resolution.entry.fields.get("title", "")).strip()
+    raw_text = str(citation.get("raw_text", "")).strip()
+    if resolved_title and raw_text and len(resolved_title) < len(raw_text) and _citation_raw_text_looks_like_reference(raw_text):
+        merged.fields["title"] = resolved_title
+    if citation.get("legacy_reference_number"):
+        note_parts = [merged.fields.get("note", "").strip()]
+        note_parts.append(
+            f"ecospecies_reference_number = {{{str(citation.get('legacy_reference_number', '')).strip()}}}"
+        )
+        merged.fields["note"] = "; ".join(_dedupe_note_parts(note_parts))
+
+    citation_key = build_standard_citation_key(
+        authors=merged.fields.get("author", ""),
+        year=merged.fields.get("year", ""),
+        title=merged.fields.get("title", ""),
+        fallback_text=str(citation.get("raw_text", "")).strip(),
+    )
+    draft_bibtex = render_single_bibtex(merged.entry_type, citation_key, merged.fields)
+    return {
+        "citation_key": citation_key,
+        "entry_type": merged.entry_type,
+        "normalized_text": _render_normalized_text(merged.entry_type, merged.fields),
+        "abstract_text": merged.fields.get("abstract", "").strip(),
+        "draft_bibtex": draft_bibtex,
+        "doi": merged.fields.get("doi", "").strip(),
+        "source_url": merged.fields.get("url", "").strip(),
+        "openalex_id": merged.fields.get("openalex", "").strip(),
+        "resolver_source_label": resolution.source_label,
+        "enrichment_status": "resolved",
+        "enrichment_error": "",
+        "conflicts": conflicts,
+    }
diff --git a/apps/api/src/ecospecies_api/citegeist_bridge.py b/apps/api/src/ecospecies_api/citegeist_bridge.py
new file mode 100644
index 0000000..98cc67e
--- /dev/null
+++ b/apps/api/src/ecospecies_api/citegeist_bridge.py
@@ -0,0 +1,387 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+import re
+import sys
+
+
+def _load_citegeist_extract():
+    citegeist_src = Path(__file__).resolve().parents[5] / "CiteGeist" / "src"
+    if citegeist_src.exists() and str(citegeist_src) not in sys.path:
+        sys.path.insert(0, str(citegeist_src))
+    try:
+        from citegeist.extract import extract_references  # type: ignore
+    except ImportError:
+        return None
+    return extract_references
+
+
+@dataclass
+class DraftCitation:
+    citation_key: str
+    entry_type: str
+    fields: dict[str, str]
+    draft_bibtex: str
+
+
+STOPWORD_TOKENS = {
+    "a",
+    "an",
+    "and",
+    "for",
+    "from",
+    "in",
+    "of",
+    "on",
+    "the",
+    "to",
+    "with",
+}
+HISTORICAL_YEAR_PATTERN = r"(1\d{3}|20\d{2})"
+
+
+def build_standard_citation_key(
+    authors: str = "",
+    year: str = "",
+    title: str = "",
+    fallback_text: str = "",
+) -> str:
+    family_name = _family_name_stem(authors or fallback_text)
+    year_stem = re.sub(r"[^0-9]+", "", year)[:4]
+    topic_stem = _topic_stem(title or fallback_text)
+    key = f"{family_name}{year_stem}{topic_stem}"
+    return key or "reference"
+
+
+def extract_draft_citation(raw_text: str, legacy_reference_number: str = "") -> DraftCitation | None:
+    extractor = _load_citegeist_extract()
+    if extractor is None:
+        return _fallback_citation(raw_text, legacy_reference_number)
+
+    entries = extractor(raw_text)
+    if not entries:
+        return _fallback_citation(raw_text, legacy_reference_number)
+
+    entry = entries[0]
+    fields = dict(entry.fields)
+    fields = _repair_reference_fields(raw_text, fields)
+    citation_key = build_standard_citation_key(
+        authors=str(fields.get("author", "")),
+        year=str(fields.get("year", "")),
+        title=str(fields.get("title", "")),
+        fallback_text=raw_text,
+    )
+    note_parts = [fields.get("note", "").strip()] if fields.get("note") else []
+    if legacy_reference_number:
+        note_parts.append(f"ecospecies_reference_number = {{{legacy_reference_number}}}")
+    fields["note"] = "; ".join(part for part in note_parts if part)
+    draft_bibtex = render_single_bibtex(entry.entry_type, citation_key, fields)
+    return DraftCitation(
+        citation_key=citation_key,
+        entry_type=entry.entry_type,
+        fields=fields,
+        draft_bibtex=draft_bibtex,
+    )
+
+
+def _fallback_citation(raw_text: str, legacy_reference_number: str) -> DraftCitation:
+    year_match = re.search(rf"\b{HISTORICAL_YEAR_PATTERN}\b", raw_text)
+    year = year_match.group(0) if year_match else ""
+    fields = _repair_reference_fields(
+        raw_text,
+        {
+            "title": raw_text.strip(),
+            "year": year,
+        },
+    )
+    title = str(fields.get("title", "")).strip() or raw_text.strip()
+    citation_key = build_standard_citation_key(year=year, title=title, fallback_text=raw_text)
+    fields["note"] = f"raw_reference = {{{raw_text}}}"
+    if legacy_reference_number:
+        fields["note"] += f"; ecospecies_reference_number = {{{legacy_reference_number}}}"
+    draft_bibtex = render_single_bibtex("misc", citation_key, fields)
+    return DraftCitation(
+        citation_key=citation_key,
+        entry_type="misc",
+        fields=fields,
+        draft_bibtex=draft_bibtex,
+    )
+
+
+def _family_name_stem(raw_text: str) -> str:
+    compact = raw_text.strip()
+    if not compact:
+        return "ref"
+    if "," in compact:
+        compact = compact.split(",", 1)[0]
+    else:
+        compact = compact.split()[0]
+    compact = re.sub(r"[^A-Za-z0-9]+", "", compact).lower()
+    return compact or "ref"
+
+
+def _topic_stem(raw_text: str) -> str:
+    tokens = [
+        token
+        for token in re.findall(r"[A-Za-z0-9]+", raw_text.lower())
+        if token not in STOPWORD_TOKENS and not token.isdigit()
+    ]
+    topic_tokens = tokens[:3] or ["topic"]
+    return "".join(topic_tokens)
+
+
+def _repair_reference_fields(raw_text: str, fields: dict[str, str]) -> dict[str, str]:
+    repaired = dict(fields)
+    title = str(repaired.get("title", "")).strip()
+    raw = raw_text.strip()
+    if not raw:
+        return repaired
+
+    parsed = _parse_report_style_reference(raw)
+    if parsed is None:
+        return repaired
+
+    current_venue = (
+        str(repaired.get("journal", "")).strip()
+        or str(repaired.get("howpublished", "")).strip()
+        or str(repaired.get("booktitle", "")).strip()
+        or str(repaired.get("publisher", "")).strip()
+    )
+    parsed_venue = str(parsed.get("venue", "")).strip()
+    needs_structural_repair = bool(
+        parsed_venue
+        and (
+            not current_venue
+            or len(current_venue) < max(8, len(parsed_venue) // 2)
+            or current_venue.lower() not in parsed_venue.lower()
+            or (parsed.get("volume") and not str(repaired.get("volume", "")).strip())
+            or (parsed.get("number") and not str(repaired.get("number", "")).strip())
+            or (parsed.get("pages") and not str(repaired.get("pages", "")).strip())
+        )
+    )
+    if title and not _title_looks_like_raw_reference(title) and not needs_structural_repair:
+        return repaired
+
+    if parsed.get("author"):
+        repaired["author"] = parsed["author"]
+    if parsed.get("year"):
+        repaired["year"] = parsed["year"]
+    if parsed.get("title"):
+        repaired["title"] = parsed["title"]
+    venue = parsed.get("venue", "")
+    if venue:
+        repaired.pop("howpublished", None)
+        if _venue_looks_journal_like(venue):
+            repaired["journal"] = venue
+        else:
+            repaired["howpublished"] = venue
+    if parsed.get("volume"):
+        repaired["volume"] = parsed["volume"]
+    if parsed.get("number"):
+        repaired["number"] = parsed["number"]
+    if parsed.get("pages"):
+        repaired["pages"] = parsed["pages"]
+    return repaired
+
+
+def _title_looks_like_raw_reference(title: str) -> bool:
+    compact = " ".join(title.split()).strip()
+    if not compact:
+        return True
+    if len(compact) > 120:
+        return True
+    return bool(re.match(rf"^[^,]+,\s+.+\b{HISTORICAL_YEAR_PATTERN}\.\s+", compact))
+
+
+def _parse_report_style_reference(raw_text: str) -> dict[str, str] | None:
+    match = re.match(
+        rf"^(?P<author>.+?)\s+(?P<year>{HISTORICAL_YEAR_PATTERN})\.\s+(?P<remainder>.+)$",
+        raw_text.strip(),
+    )
+    if match is None:
+        return None
+
+    author = match.group("author").strip(" .")
+    year = match.group("year").strip()
+    remainder = match.group("remainder").strip()
+    if not author or not remainder:
+        return None
+
+    venue_start = _find_venue_start(remainder)
+    if venue_start is None:
+        return {
+            "author": author,
+            "year": year,
+            "title": remainder.strip(" ."),
+            "venue": "",
+        }
+
+    title = remainder[:venue_start].strip(" .")
+    venue_part = remainder[venue_start:].strip(" .")
+    venue, volume, number, pages = _split_venue_and_locator(venue_part)
+    return {
+        "author": author,
+        "year": year,
+        "title": title,
+        "venue": venue,
+        "volume": volume,
+        "number": number,
+        "pages": pages,
+    }
+
+
+def _split_venue_and_locator(venue_part: str) -> tuple[str, str, str, str]:
+    compact = venue_part.strip(" .")
+    if not compact:
+        return "", "", "", ""
+
+    match = re.search(
+        r"(?P<venue>.+?)\.\s+(?P<volume>\d+)(?:\((?P<number>[^)]+)\))?\s*:\s*(?P<pages>\d+(?:-\d+)?)\.?$",
+        compact,
+    )
+    if match is None:
+        match = re.search(
+            r"(?P<venue>.+?)\s+(?P<volume>\d+)(?:\((?P<number>[^)]+)\))?\s*:\s*(?P<pages>\d+(?:-\d+)?)\.?$",
+            compact,
+        )
+    if match is None:
+        return compact, "", "", ""
+
+    return (
+        match.group("venue").strip(" ."),
+        (match.group("volume") or "").strip(),
+        (match.group("number") or "").strip(),
+        (match.group("pages") or "").strip(),
+    )
+
+
+def _find_venue_start(remainder: str) -> int | None:
+    for match in re.finditer(r"\.\s+", remainder):
+        candidate_start = match.end()
+        candidate = remainder[candidate_start:].strip()
+        if _looks_like_publication_segment(candidate):
+            return candidate_start
+
+    lowered = remainder.lower()
+    markers = (
+        "comm. rept.",
+        "rept.",
+        "proc.",
+        "procs.",
+        "journal",
+        "transactions",
+        "proceedings",
+        "bulletin",
+        "bull.",
+        "occas. pap.",
+        "pap.",
+        "memoir",
+        "memorandum",
+        "memo.",
+        "tech. memo.",
+        "tech memo",
+        "technical memorandum",
+        "technical report",
+        "noaa",
+    )
+    positions = [lowered.find(marker) for marker in markers if lowered.find(marker) > 0]
+    if positions:
+        return min(positions)
+    return None
+
+
+def _looks_like_publication_segment(candidate: str) -> bool:
+    compact = candidate.strip(" .")
+    if not compact:
+        return False
+
+    venue, volume, number, pages = _split_venue_and_locator(compact)
+    if venue and (volume or number or pages) and _starts_with_publication_marker(compact):
+        return True
+
+    return _starts_with_publication_marker(compact)
+
+
+def _starts_with_publication_marker(text: str) -> bool:
+    lowered = text.lower()
+    publication_starts = (
+        "comm. rept.",
+        "rept.",
+        "proc.",
+        "procs.",
+        "journal",
+        "transactions",
+        "proceedings",
+        "bulletin",
+        "bull.",
+        "occas. pap.",
+        "pap.",
+        "memoir",
+        "memorandum",
+        "memo.",
+        "tech. memo.",
+        "tech memo",
+        "technical memorandum",
+        "technical report",
+        "noaa",
+        "u.s.",
+    )
+    return lowered.startswith(publication_starts)
+
+
+def _venue_looks_journal_like(venue: str) -> bool:
+    lowered = venue.lower()
+    return any(
+        token in lowered
+        for token in (
+            "journal",
+            "transactions",
+            "review",
+            "letters",
+            "comm. rept.",
+            "rept.",
+            "proc.",
+            "proceedings",
+            "occas. pap.",
+            "pap.",
+        )
+    )
+
+
+def render_single_bibtex(entry_type: str, citation_key: str, fields: dict[str, str]) -> str:
+    lines = [f"@{entry_type}{{{citation_key},"]
+    for key in sorted(fields):
+        value = _sanitize_bibtex_value(fields[key])
+        lines.append(f"  {key} = {{{value}}},")
+    lines.append("}")
+    return "\n".join(lines)
+
+
+def _sanitize_bibtex_value(value: str) -> str:
+    depth = 0
+    parts: list[str] = []
+    for char in value:
+        if char == "{":
+            depth += 1
+            parts.append(char)
+            continue
+        if char == "}":
+            if depth == 0:
+                parts.append(")")
+            else:
+                depth -= 1
+                parts.append(char)
+            continue
+        parts.append(char)
+    if depth > 0:
+        open_count = depth
+        normalized: list[str] = []
+        for char in parts:
+            if char == "{" and open_count > 0:
+                normalized.append("(")
+                open_count -= 1
+            else:
+                normalized.append(char)
+        return "".join(normalized)
+    return "".join(parts)
diff --git a/apps/api/src/ecospecies_api/document_format.py b/apps/api/src/ecospecies_api/document_format.py
new file mode 100644
index 0000000..d1b2cf3
--- /dev/null
+++ b/apps/api/src/ecospecies_api/document_format.py
@@ -0,0 +1,480 @@
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import asdict, dataclass, field
+
+
+HEADING_PATTERN = re.compile(r"^(#{2,6})\s+(?P<title>.+?)\s*$")
+INDENTED_ITEM_PATTERN = re.compile(r"^\s*-\s*(?P<body>.+?)\s*$")
+DOI_PATTERN = re.compile(r"\b10\.\d{4,9}/[-._;()/:A-Za-z0-9]+\b")
+
+
+@dataclass
+class DocumentNode:
+    node_type: str
+    title: str
+    body: str
+    depth: int
+    children: list["DocumentNode"] = field(default_factory=list)
+
+
+@dataclass
+class StructuredDocument:
+    metadata: dict[str, object]
+    nodes: list[DocumentNode]
+
+
+def _parse_scalar_value(value: str) -> object:
+    stripped = value.strip()
+    if not stripped:
+        return ""
+    if stripped.lower() == "true":
+        return True
+    if stripped.lower() == "false":
+        return False
+    if stripped.startswith("{") or stripped.startswith("["):
+        try:
+            return json.loads(stripped)
+        except json.JSONDecodeError:
+            return stripped
+    return stripped
+
+
+def _normalize_whitespace(value: str) -> str:
+    return re.sub(r"\s+", " ", value).strip()
+
+
+def _parse_front_matter(front_matter: str) -> dict[str, object]:
+    metadata: dict[str, object] = {}
+    lines = front_matter.splitlines()
+    index = 0
+
+    while index < len(lines):
+        raw_line = lines[index]
+        if not raw_line.strip() or raw_line.lstrip().startswith("#"):
+            index += 1
+            continue
+        if ":" not in raw_line:
+            index += 1
+            continue
+
+        key, value = raw_line.split(":", 1)
+        normalized_key = key.strip()
+        stripped_value = value.strip()
+        if stripped_value:
+            metadata[normalized_key] = _parse_scalar_value(stripped_value)
+            index += 1
+            continue
+
+        items: list[dict[str, object]] = []
+        index += 1
+        while index < len(lines):
+            item_line = lines[index]
+            if not item_line.strip():
+                index += 1
+                continue
+            if not item_line.startswith("  - "):
+                break
+
+            match = INDENTED_ITEM_PATTERN.match(item_line)
+            if not match:
+                break
+            item: dict[str, object] = {}
+            first_body = match.group("body")
+            if ":" in first_body:
+                item_key, item_value = first_body.split(":", 1)
+                item[item_key.strip()] = _parse_scalar_value(item_value.strip())
+            index += 1
+
+            while index < len(lines):
+                nested_line = lines[index]
+                if nested_line.startswith("    ") and ":" in nested_line.strip():
+                    nested_key, nested_value = nested_line.strip().split(":", 1)
+                    item[nested_key.strip()] = _parse_scalar_value(nested_value.strip())
+                    index += 1
+                    continue
+                break
+
+            items.append(item)
+
+        metadata[normalized_key] = items
+
+    return metadata
+
+
+def _split_front_matter(text: str) -> tuple[dict[str, object], str]:
+    stripped = text.lstrip()
+    if not stripped.startswith("---\n"):
+        return {}, text
+
+    _, _, remainder = stripped.partition("---\n")
+    front_matter, separator, body = remainder.partition("\n---\n")
+    if not separator:
+        return {}, text
+
+    return _parse_front_matter(front_matter), body
+
+
+def parse_markdown_document(text: str) -> StructuredDocument:
+    metadata, body = _split_front_matter(text)
+    root_nodes: list[DocumentNode] = []
+    stack: list[DocumentNode] = []
+    body_lines: list[str] = []
+
+    def flush_body() -> None:
+        if not stack:
+            body_lines.clear()
+            return
+        stack[-1].body = "\n".join(body_lines).strip()
+        body_lines.clear()
+
+    for raw_line in body.splitlines():
+        match = HEADING_PATTERN.match(raw_line)
+        if not match:
+            body_lines.append(raw_line)
+            continue
+
+        flush_body()
+        depth = len(match.group(1))
+        node = DocumentNode(
+            node_type="section",
+            title=match.group("title").strip(),
+            body="",
+            depth=depth,
+        )
+
+        while stack and stack[-1].depth >= depth:
+            stack.pop()
+
+        if stack:
+            stack[-1].children.append(node)
+        else:
+            root_nodes.append(node)
+        stack.append(node)
+
+    flush_body()
+    return StructuredDocument(metadata=metadata, nodes=root_nodes)
+
+
+def validate_markdown_document(text: str) -> list[str]:
+    errors: list[str] = []
+    metadata, body = _split_front_matter(text)
+    if not metadata:
+        errors.append("Markdown document must include YAML front matter.")
+
+    last_depth: int | None = None
+    for raw_line in body.splitlines():
+        match = HEADING_PATTERN.match(raw_line)
+        if not match:
+            continue
+        depth = len(match.group(1))
+        if last_depth is not None and depth > last_depth + 1:
+            errors.append(
+                f"Heading depth jumps from level {last_depth} to level {depth}: {match.group('title').strip()}"
+            )
+        last_depth = depth
+
+    return errors
+
+
+def _append_metadata_lines(lines: list[str], key: str, value: object) -> None:
+    if isinstance(value, list):
+        lines.append(f"{key}:")
+        for item in value:
+            if isinstance(item, dict) and item:
+                first = True
+                for item_key, item_value in item.items():
+                    rendered = "true" if item_value is True else "false" if item_value is False else str(item_value)
+                    prefix = "  - " if first else "    "
+                    lines.append(f"{prefix}{item_key}: {rendered}")
+                    first = False
+            else:
+                lines.append(f"  - {item}")
+        return
+
+    rendered = "true" if value is True else "false" if value is False else str(value)
+    lines.append(f"{key}: {rendered}")
+
+
+def export_markdown_document(document: StructuredDocument) -> str:
+    lines: list[str] = ["---"]
+    for key, value in document.metadata.items():
+        _append_metadata_lines(lines, key, value)
+    lines.append("---")
+    lines.append("")
+
+    def append_nodes(nodes: list[DocumentNode]) -> None:
+        for node in nodes:
+            lines.append(f"{'#' * node.depth} {node.title}")
+            if node.body:
+                lines.append(node.body)
+            lines.append("")
+            append_nodes(node.children)
+
+    append_nodes(document.nodes)
+    return "\n".join(lines).rstrip() + "\n"
+
+
+def flatten_document_nodes(document: StructuredDocument) -> list[dict[str, object]]:
+    flattened: list[dict[str, object]] = []
+
+    def visit(nodes: list[DocumentNode], parent_id: str | None) -> None:
+        for index, node in enumerate(nodes, start=1):
+            node_id = f"node-{len(flattened) + 1}"
+            flattened.append(
+                {
+                    "node_id": node_id,
+                    "parent_id": parent_id,
+                    "position": index,
+                    "depth": node.depth,
+                    "node_type": node.node_type,
+                    "title": node.title,
+                    "body_markdown": node.body,
+                    "body_plaintext": node.body,
+                }
+            )
+            visit(node.children, node_id)
+
+    visit(document.nodes, None)
+    return flattened
+
+
+def document_to_json(document: StructuredDocument) -> str:
+    return json.dumps(asdict(document), ensure_ascii=True)
+
+
+def build_document_from_species_payload(item: dict[str, object]) -> StructuredDocument:
+    legacy_identifiers: list[dict[str, object]] = []
+    if item.get("flelmr_code"):
+        legacy_identifiers.append(
+            {
+                "authority": "legacy-ecospecies",
+                "identifier": str(item.get("flelmr_code", "")),
+                "label": "FLELMR",
+            }
+        )
+
+    metadata = {
+        "title": str(item.get("title", "")),
+        "common_name": str(item.get("common_name", "")),
+        "scientific_name": str(item.get("scientific_name", "")),
+        "legacy_identifiers": legacy_identifiers,
+        "taxon_identifiers": list(item.get("taxon_identifiers", [])),
+        "primary_taxon_authority": str(item.get("primary_taxon_authority", "")),
+        "source_file": str(item.get("source_file", "")),
+        "publication_status": str(item.get("publication_status", "published")),
+        "source_format": "ecospecies-markdown-v1",
+    }
+
+    nodes: list[DocumentNode] = []
+    summary = str(item.get("summary", "")).strip()
+    if summary:
+        nodes.append(
+            DocumentNode(
+                node_type="section",
+                title="Summary",
+                body=summary,
+                depth=2,
+            )
+        )
+
+    for section in item.get("sections", []):
+        heading = str(section.get("heading", "")).strip()
+        if not heading or heading == "HEADER":
+            continue
+        nodes.append(
+            DocumentNode(
+                node_type="section",
+                title=heading,
+                body=str(section.get("content", "")).strip(),
+                depth=2,
+            )
+        )
+
+    return StructuredDocument(metadata=metadata, nodes=nodes)
+
+
+def extract_species_projection(document: StructuredDocument) -> dict[str, object]:
+    metadata = document.metadata
+    summary = ""
+    sections: list[dict[str, object]] = []
+    legacy_identifiers = metadata.get("legacy_identifiers", [])
+    taxon_identifiers = metadata.get("taxon_identifiers", [])
+
+    flelmr_code = ""
+    if isinstance(legacy_identifiers, list):
+        for item in legacy_identifiers:
+            if not isinstance(item, dict):
+                continue
+            authority = str(item.get("authority", "")).strip().lower()
+            label = str(item.get("label", "")).strip().lower()
+            if authority == "legacy-ecospecies" or label == "flelmr":
+                flelmr_code = str(item.get("identifier", "")).strip()
+                if flelmr_code:
+                    break
+    if not flelmr_code:
+        flelmr_code = str(metadata.get("species_code", "")).strip()
+
+    def visit(nodes: list[DocumentNode], path: list[str]) -> None:
+        nonlocal summary
+        for node in nodes:
+            current_path = [*path, node.title]
+            if node.title.lower() == "summary" and not summary:
+                summary = node.body.strip()
+            else:
+                sections.append(
+                    {
+                        "heading": " / ".join(current_path),
+                        "content": node.body.strip(),
+                    }
+                )
+            visit(node.children, current_path)
+
+    visit(document.nodes, [])
+    return {
+        "title": metadata.get("title", ""),
+        "common_name": metadata.get("common_name", ""),
+        "scientific_name": metadata.get("scientific_name", ""),
+        "flelmr_code": flelmr_code,
+        "legacy_identifiers": legacy_identifiers if isinstance(legacy_identifiers, list) else [],
+        "taxon_identifiers": taxon_identifiers if isinstance(taxon_identifiers, list) else [],
+        "primary_taxon_authority": str(metadata.get("primary_taxon_authority", "")),
+        "summary": summary,
+        "sections": sections,
+    }
+
+
+def _is_citation_heading(title: str) -> bool:
+    lowered = title.strip().rstrip(":").lower()
+    return lowered in {
+        "references",
+        "reference",
+        "citations",
+        "citation",
+        "bibliography",
+        "related references",
+        "related citations",
+    }
+
+
+def _split_citation_lines(body: str) -> list[str]:
+    entries: list[dict[str, str]] = []
+    current: list[str] = []
+    current_number = ""
+
+    def flush() -> None:
+        nonlocal current_number
+        if not current:
+            return
+        compact = " ".join(part.strip() for part in current if part.strip()).strip()
+        if compact:
+            entries.append(
+                {
+                    "legacy_reference_number": current_number,
+                    "raw_text": compact,
+                }
+            )
+        current.clear()
+        current_number = ""
+
+    for raw_line in body.splitlines():
+        stripped = raw_line.strip()
+        if not stripped:
+            flush()
+            continue
+
+        leading_number_match = re.match(r"^(?P<num>\d+)\s*,\s*(?P<text>.+)$", stripped)
+        if leading_number_match:
+            flush()
+            current_number = leading_number_match.group("num")
+            current.append(leading_number_match.group("text"))
+            continue
+
+        bare_number_match = re.match(r"^(?P<num>\d+)\s+(?P<text>[A-Z].+)$", stripped)
+        if bare_number_match:
+            flush()
+            current_number = bare_number_match.group("num")
+            current.append(bare_number_match.group("text"))
+            continue
+
+        bullet_match = re.match(
+            r"^(?:[-*]|\[(?P<bracket_num>\d+)\]|(?P<plain_num>\d+)[\.,])\s+(?P<text>.+)$",
+            stripped,
+        )
+        if bullet_match:
+            flush()
+            current_number = bullet_match.group("bracket_num") or bullet_match.group("plain_num") or ""
+            bullet_text = bullet_match.group("text")
+            if not current_number:
+                nested_number_match = re.match(r"^\[(?P<num>\d+)\]\s+(?P<text>.+)$", bullet_text)
+                if nested_number_match:
+                    current_number = nested_number_match.group("num")
+                    bullet_text = nested_number_match.group("text")
+                else:
+                    nested_comma_match = re.match(r"^(?P<num>\d+)\s*,\s*(?P<text>.+)$", bullet_text)
+                    if nested_comma_match:
+                        current_number = nested_comma_match.group("num")
+                        bullet_text = nested_comma_match.group("text")
+            current.append(bullet_text)
+            continue
+
+        current.append(stripped)
+
+    flush()
+    return entries
+
+
+def extract_citation_entries(document: StructuredDocument) -> list[dict[str, object]]:
+    entries: list[dict[str, object]] = []
+
+    def visit(nodes: list[DocumentNode], path: list[str]) -> None:
+        for node in nodes:
+            current_path = [*path, node.title]
+            if _is_citation_heading(node.title):
+                section_heading = " / ".join(current_path)
+                for item in _split_citation_lines(node.body):
+                    raw_text = item["raw_text"]
+                    doi_match = DOI_PATTERN.search(raw_text)
+                    entries.append(
+                        {
+                            "section_heading": section_heading,
+                            "legacy_reference_number": item["legacy_reference_number"],
+                            "raw_text": raw_text,
+                            "normalized_text": _normalize_whitespace(raw_text),
+                            "doi": doi_match.group(0) if doi_match else "",
+                        }
+                    )
+            visit(node.children, current_path)
+
+    visit(document.nodes, [])
+    return entries
+
+
+def add_citation_to_document(
+    document: StructuredDocument,
+    citation_text: str,
+    heading_title: str = "Related References",
+) -> bool:
+    normalized_citation = _normalize_whitespace(citation_text)
+    if not normalized_citation:
+        return False
+
+    for node in document.nodes:
+        if _is_citation_heading(node.title):
+            existing = {_normalize_whitespace(item["raw_text"]) for item in _split_citation_lines(node.body)}
+            if normalized_citation in existing:
+                return False
+            body = node.body.rstrip()
+            node.body = f"{body}\n- {citation_text}".strip() if body else f"- {citation_text}"
+            return True
+
+    document.nodes.append(
+        DocumentNode(
+            node_type="section",
+            title=heading_title,
+            body=f"- {citation_text}",
+            depth=2,
+        )
+    )
+    return True
diff --git a/apps/api/src/ecospecies_api/document_repository.py b/apps/api/src/ecospecies_api/document_repository.py
new file mode 100644
index 0000000..e4ebaeb
--- /dev/null
+++ b/apps/api/src/ecospecies_api/document_repository.py
@@ -0,0 +1,267 @@
+from __future__ import annotations
+
+from sqlalchemy import select
+
+from ecospecies_api.citegeist_bridge import extract_draft_citation
+from ecospecies_api.document_format import (
+    build_document_from_species_payload,
+    document_to_json,
+    extract_citation_entries,
+    extract_species_projection,
+    export_markdown_document,
+    flatten_document_nodes,
+    parse_markdown_document,
+    validate_markdown_document,
+)
+from ecospecies_api.models import (
+    DocumentSection,
+    Species,
+    SpeciesCitation,
+    SpeciesDocument,
+    SpeciesDocumentNode,
+    SpeciesTaxonIdentifier,
+)
+
+
+def _persist_taxon_identifiers(session, species: Species, taxon_identifiers: list[dict[str, object]]) -> None:
+    for identifier in list(species.taxon_identifiers):
+        session.delete(identifier)
+    session.flush()
+
+    for position, item in enumerate(taxon_identifiers, start=1):
+        authority = str(item.get("authority", "")).strip()
+        identifier = str(item.get("identifier", "")).strip()
+        if not authority or not identifier:
+            continue
+        session.add(
+            SpeciesTaxonIdentifier(
+                species_id=species.id,
+                position=position,
+                authority=authority,
+                identifier=identifier,
+                label=str(item.get("label", "")).strip(),
+                is_primary=bool(item.get("primary") or item.get("is_primary")),
+                source_url=str(item.get("source_url", "")).strip(),
+            )
+        )
+
+
+def _existing_taxon_identifier_payload(species: Species) -> list[dict[str, object]]:
+    return [
+        {
+            "authority": item.authority,
+            "identifier": item.identifier,
+            "label": item.label,
+            "primary": item.is_primary,
+            "source_url": item.source_url,
+        }
+        for item in species.taxon_identifiers
+    ]
+
+
+def _citation_match_key(item: dict[str, object]) -> tuple[str, str, str]:
+    return (
+        str(item.get("section_heading", "")).strip(),
+        str(item.get("legacy_reference_number", "")).strip(),
+        str(item.get("raw_text", "")).strip(),
+    )
+
+
+def _persist_citations(session, species: Species, citations: list[dict[str, object]]) -> None:
+    existing_by_key = {
+        _citation_match_key(
+            {
+                "section_heading": citation.section_heading,
+                "legacy_reference_number": citation.legacy_reference_number,
+                "raw_text": citation.raw_text,
+            }
+        ): citation
+        for citation in species.citations
+    }
+    retained_ids: set[int] = set()
+
+    for position, item in enumerate(citations, start=1):
+        raw_text = str(item.get("raw_text", "")).strip()
+        if not raw_text:
+            continue
+        key = _citation_match_key(item)
+        legacy_reference_number = str(item.get("legacy_reference_number", "")).strip()
+        existing = existing_by_key.get(key)
+        extracted_normalized = str(item.get("normalized_text", "")).strip()
+        extracted_doi = str(item.get("doi", "")).strip()
+        draft = extract_draft_citation(raw_text, legacy_reference_number)
+
+        if existing is None:
+            session.add(
+                SpeciesCitation(
+                    species_id=species.id,
+                    position=position,
+                    section_heading=str(item.get("section_heading", "")).strip(),
+                    legacy_reference_number=legacy_reference_number,
+                    citation_key=draft.citation_key if draft is not None else "",
+                    entry_type=draft.entry_type if draft is not None else "misc",
+                    raw_text=raw_text,
+                    normalized_text=extracted_normalized,
+                    abstract_text="",
+                    draft_bibtex=draft.draft_bibtex if draft is not None else "",
+                    doi=extracted_doi,
+                    source_url="",
+                    openalex_id="",
+                    resolver_source_label="",
+                    enrichment_status="pending",
+                    enrichment_error="",
+                    source_type="document_extract",
+                    review_status="draft",
+                )
+            )
+            continue
+
+        existing.position = position
+        existing.section_heading = str(item.get("section_heading", "")).strip()
+        existing.legacy_reference_number = legacy_reference_number
+        existing.raw_text = raw_text
+        if existing.review_status == "draft":
+            existing.normalized_text = extracted_normalized
+            existing.abstract_text = ""
+            existing.doi = extracted_doi
+            existing.citation_key = draft.citation_key if draft is not None else ""
+            existing.entry_type = draft.entry_type if draft is not None else "misc"
+            existing.draft_bibtex = draft.draft_bibtex if draft is not None else ""
+            existing.source_type = "document_extract"
+            existing.enrichment_status = "pending"
+            existing.enrichment_error = ""
+            existing.resolver_source_label = ""
+            existing.source_url = ""
+            existing.openalex_id = ""
+        retained_ids.add(existing.id)
+        session.add(existing)
+
+    for citation in list(species.citations):
+        if citation.id not in retained_ids and citation.source_type in {"document_extract", "editor_review"}:
+            session.delete(citation)
+
+
+def _persist_document_model(session, species: Species, document_model, markdown_content: str, updated_by: str) -> None:
+    ast_json = document_to_json(document_model)
+    document = session.scalar(
+        select(SpeciesDocument).where(SpeciesDocument.species_id == species.id)
+    )
+    if document is None:
+        document = SpeciesDocument(
+            species_id=species.id,
+            source_format="ecospecies-markdown-v1",
+            markdown_content=markdown_content,
+            ast_json=ast_json,
+            updated_by=updated_by,
+        )
+        session.add(document)
+        session.flush()
+    else:
+        document.source_format = "ecospecies-markdown-v1"
+        document.markdown_content = markdown_content
+        document.ast_json = ast_json
+        document.updated_by = updated_by
+        session.add(document)
+
+    for node in list(document.nodes):
+        session.delete(node)
+    session.flush()
+
+    for node in flatten_document_nodes(document_model):
+        session.add(
+            SpeciesDocumentNode(
+                document_id=document.id,
+                parent_node_ref=node["parent_id"],
+                node_ref=node["node_id"],
+                position=node["position"],
+                depth=node["depth"],
+                node_type=node["node_type"],
+                title=node["title"],
+                body_markdown=node["body_markdown"],
+                body_plaintext=node["body_plaintext"],
+                )
+            )
+
+
+def sync_species_document(session, species: Species, item: dict[str, object]) -> None:
+    payload = dict(item)
+    if "taxon_identifiers" not in payload or not payload.get("taxon_identifiers"):
+        payload["taxon_identifiers"] = _existing_taxon_identifier_payload(species)
+    if "primary_taxon_authority" not in payload or not payload.get("primary_taxon_authority"):
+        for identifier in payload["taxon_identifiers"]:
+            if bool(identifier.get("primary")):
+                payload["primary_taxon_authority"] = str(identifier.get("authority", "")).strip()
+                break
+
+    document_model = build_document_from_species_payload(payload)
+    markdown_content = export_markdown_document(document_model)
+    _persist_document_model(
+        session,
+        species,
+        document_model,
+        markdown_content,
+        str(item.get("last_modified_by", "system-import")),
+    )
+    _persist_citations(session, species, extract_citation_entries(document_model))
+
+
+def get_species_document_payload(session, slug: str) -> dict[str, object] | None:
+    species = session.scalar(select(Species).where(Species.slug == slug))
+    if species is None or species.document is None:
+        return None
+
+    document = species.document
+    return {
+        "slug": species.slug,
+        "source_format": document.source_format,
+        "markdown": document.markdown_content,
+        "ast_json": document.ast_json,
+        "node_count": len(document.nodes),
+        "updated_by": document.updated_by,
+    }
+
+
+def save_species_document(session, species: Species, markdown: str, username: str) -> dict[str, object]:
+    errors = validate_markdown_document(markdown)
+    if errors:
+        raise ValueError("; ".join(errors))
+
+    document_model = parse_markdown_document(markdown)
+    projection = extract_species_projection(document_model)
+    _persist_document_model(session, species, document_model, markdown, username)
+    _persist_citations(session, species, extract_citation_entries(document_model))
+
+    if projection["title"]:
+        species.title = str(projection["title"])
+    if projection["common_name"]:
+        species.common_name = str(projection["common_name"])
+    if projection["scientific_name"]:
+        species.scientific_name = str(projection["scientific_name"])
+    if projection["flelmr_code"]:
+        species.flelmr_code = str(projection["flelmr_code"])
+    _persist_taxon_identifiers(session, species, list(projection["taxon_identifiers"]))
+    species.summary = str(projection["summary"])
+    species.section_count = len(projection["sections"])
+    species.last_modified_by = username
+
+    for section in list(species.sections):
+        session.delete(section)
+    session.flush()
+
+    for position, section in enumerate(projection["sections"], start=1):
+        session.add(
+            DocumentSection(
+                species_id=species.id,
+                position=position,
+                heading=str(section["heading"]),
+                content=str(section["content"]),
+            )
+        )
+
+    return {
+        "slug": species.slug,
+        "summary": species.summary,
+        "section_count": species.section_count,
+        "markdown": markdown,
+        "updated_by": username,
+    }
diff --git a/apps/api/src/ecospecies_api/models.py b/apps/api/src/ecospecies_api/models.py
index 89248cd..238c43e 100644
--- a/apps/api/src/ecospecies_api/models.py
+++ b/apps/api/src/ecospecies_api/models.py
@@ -23,6 +23,9 @@ class Species(Base):
     publication_status: Mapped[str] = mapped_column(String(32), default="published", index=True)
     is_archived: Mapped[bool] = mapped_column(Boolean, default=False, index=True)
     editor_notes: Mapped[str] = mapped_column(Text, default="")
+    created_by: Mapped[str] = mapped_column(String(255), default="system-import")
+    owner_username: Mapped[str] = mapped_column(String(255), default="")
+    owner_role: Mapped[str] = mapped_column(String(32), default="")
     last_modified_by: Mapped[str] = mapped_column(String(255), default="system-import")
 
     sections: Mapped[list["DocumentSection"]] = relationship(
@@ -40,6 +43,21 @@ class Species(Base):
         cascade="all, delete-orphan",
         order_by="SpeciesAuditLog.id.desc()",
     )
+    document: Mapped["SpeciesDocument | None"] = relationship(
+        back_populates="species",
+        cascade="all, delete-orphan",
+        uselist=False,
+    )
+    taxon_identifiers: Mapped[list["SpeciesTaxonIdentifier"]] = relationship(
+        back_populates="species",
+        cascade="all, delete-orphan",
+        order_by="SpeciesTaxonIdentifier.position",
+    )
+    citations: Mapped[list["SpeciesCitation"]] = relationship(
+        back_populates="species",
+        cascade="all, delete-orphan",
+        order_by="SpeciesCitation.position",
+    )
 
 
 class DocumentSection(Base):
@@ -77,3 +95,93 @@ class SpeciesAuditLog(Base):
     details_json: Mapped[str] = mapped_column(Text)
 
     species: Mapped[Species] = relationship(back_populates="audit_entries")
+
+
+class SpeciesDocument(Base):
+    __tablename__ = "species_document"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    species_id: Mapped[int] = mapped_column(ForeignKey("species.id", ondelete="CASCADE"), unique=True, index=True)
+    source_format: Mapped[str] = mapped_column(String(64), default="ecospecies-markdown-v1")
+    markdown_content: Mapped[str] = mapped_column(Text, default="")
+    ast_json: Mapped[str] = mapped_column(Text, default="")
+    updated_by: Mapped[str] = mapped_column(String(255), default="system-import")
+
+    species: Mapped[Species] = relationship(back_populates="document")
+    nodes: Mapped[list["SpeciesDocumentNode"]] = relationship(
+        back_populates="document",
+        cascade="all, delete-orphan",
+        order_by="SpeciesDocumentNode.position",
+    )
+
+
+class SpeciesDocumentNode(Base):
+    __tablename__ = "species_document_node"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    document_id: Mapped[int] = mapped_column(ForeignKey("species_document.id", ondelete="CASCADE"), index=True)
+    parent_node_ref: Mapped[str | None] = mapped_column(String(64), nullable=True, default=None)
+    node_ref: Mapped[str] = mapped_column(String(64), index=True)
+    position: Mapped[int] = mapped_column(Integer, default=1)
+    depth: Mapped[int] = mapped_column(Integer, default=2)
+    node_type: Mapped[str] = mapped_column(String(32), default="section")
+    title: Mapped[str] = mapped_column(String(255), default="")
+    body_markdown: Mapped[str] = mapped_column(Text, default="")
+    body_plaintext: Mapped[str] = mapped_column(Text, default="")
+    source_heading: Mapped[str] = mapped_column(String(255), default="")
+    source_span_start: Mapped[int | None] = mapped_column(Integer, nullable=True, default=None)
+    source_span_end: Mapped[int | None] = mapped_column(Integer, nullable=True, default=None)
+
+    document: Mapped[SpeciesDocument] = relationship(back_populates="nodes")
+
+
+class ContributorAccount(Base):
+    __tablename__ = "contributor_account"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    email: Mapped[str] = mapped_column(String(255), unique=True, index=True)
+    token_hash: Mapped[str] = mapped_column(String(128), unique=True, index=True)
+    age_gate_confirmed: Mapped[bool] = mapped_column(Boolean, default=False)
+    created_at: Mapped[str] = mapped_column(String(64), index=True)
+    is_active: Mapped[bool] = mapped_column(Boolean, default=True, index=True)
+
+
+class SpeciesTaxonIdentifier(Base):
+    __tablename__ = "species_taxon_identifier"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    species_id: Mapped[int] = mapped_column(ForeignKey("species.id", ondelete="CASCADE"), index=True)
+    position: Mapped[int] = mapped_column(Integer, default=1)
+    authority: Mapped[str] = mapped_column(String(64), default="")
+    identifier: Mapped[str] = mapped_column(String(255), default="")
+    label: Mapped[str] = mapped_column(String(128), default="")
+    is_primary: Mapped[bool] = mapped_column(Boolean, default=False, index=True)
+    source_url: Mapped[str] = mapped_column(String(500), default="")
+
+    species: Mapped[Species] = relationship(back_populates="taxon_identifiers")
+
+
+class SpeciesCitation(Base):
+    __tablename__ = "species_citation"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    species_id: Mapped[int] = mapped_column(ForeignKey("species.id", ondelete="CASCADE"), index=True)
+    position: Mapped[int] = mapped_column(Integer, default=1)
+    section_heading: Mapped[str] = mapped_column(String(255), default="")
+    legacy_reference_number: Mapped[str] = mapped_column(String(64), default="", index=True)
+    citation_key: Mapped[str] = mapped_column(String(255), default="", index=True)
+    entry_type: Mapped[str] = mapped_column(String(64), default="misc")
+    raw_text: Mapped[str] = mapped_column(Text, default="")
+    normalized_text: Mapped[str] = mapped_column(Text, default="")
+    abstract_text: Mapped[str] = mapped_column(Text, default="")
+    draft_bibtex: Mapped[str] = mapped_column(Text, default="")
+    doi: Mapped[str] = mapped_column(String(255), default="", index=True)
+    source_url: Mapped[str] = mapped_column(String(500), default="")
+    openalex_id: Mapped[str] = mapped_column(String(64), default="", index=True)
+    resolver_source_label: Mapped[str] = mapped_column(String(255), default="")
+    enrichment_status: Mapped[str] = mapped_column(String(32), default="pending", index=True)
+    enrichment_error: Mapped[str] = mapped_column(Text, default="")
+    source_type: Mapped[str] = mapped_column(String(64), default="document_extract")
+    review_status: Mapped[str] = mapped_column(String(32), default="draft", index=True)
+
+    species: Mapped[Species] = relationship(back_populates="citations")
diff --git a/apps/api/src/ecospecies_api/parser.py b/apps/api/src/ecospecies_api/parser.py
index 1a02e42..727ef7c 100644
--- a/apps/api/src/ecospecies_api/parser.py
+++ b/apps/api/src/ecospecies_api/parser.py
@@ -1,14 +1,18 @@
 from __future__ import annotations
 
+import hashlib
 import os
 import re
+from collections import Counter
 from dataclasses import dataclass
 from pathlib import Path
 
 
 SECTION_PATTERN = re.compile(r"^[A-Z][A-Z\s/&()-]{2,}$")
+TITLE_SECTION_PATTERN = re.compile(r"^[A-Z][A-Za-z\s/&()-]{2,}$")
 FIELD_PATTERN = re.compile(r"^(?P<key>[A-Za-z/ _-]+):\s*(?P<value>.*)$")
 SUMMARY_MARKER_PATTERN = re.compile(r"^(summary(?:/abstract)?|abstract|executive summary):?\s*$", re.IGNORECASE)
+SAFE_DIRECTORY_NAME_PATTERN = re.compile(r"^[A-Za-z0-9_-]+$")
 
 
 @dataclass
@@ -38,6 +42,10 @@ class SpeciesRecord:
     diagnostics: list[IngestDiagnostic]
 
 
+def get_repo_root() -> Path:
+    return Path(__file__).resolve().parents[4]
+
+
 def slugify(value: str) -> str:
     cleaned = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
     return cleaned or "unknown-species"
@@ -53,6 +61,33 @@ def normalize_whitespace(value: str) -> str:
     return re.sub(r"\s+", " ", value).strip()
 
 
+def is_section_heading(line: str) -> bool:
+    stripped = line.strip()
+    if not stripped:
+        return False
+    normalized = stripped[:-1].strip() if stripped.endswith(":") else stripped
+    if not normalized:
+        return False
+    if ":" in normalized:
+        return False
+    if SECTION_PATTERN.fullmatch(normalized):
+        return True
+    if not TITLE_SECTION_PATTERN.fullmatch(normalized):
+        return False
+
+    words = normalized.split()
+    if len(words) > 4:
+        return False
+    return all(word[0].isupper() for word in words if word and word[0].isalpha())
+
+
+def normalize_heading(line: str) -> str:
+    stripped = line.strip()
+    if stripped.endswith(":"):
+        return stripped[:-1].strip()
+    return stripped
+
+
 def split_sections(lines: list[str]) -> list[Section]:
     sections: list[Section] = []
     current_heading = "HEADER"
@@ -61,7 +96,7 @@ def split_sections(lines: list[str]) -> list[Section]:
     for raw_line in lines:
         line = raw_line.rstrip()
         stripped = line.strip()
-        if SECTION_PATTERN.fullmatch(stripped):
+        if is_section_heading(stripped):
             if current_lines:
                 sections.append(
                     Section(
@@ -69,7 +104,7 @@ def split_sections(lines: list[str]) -> list[Section]:
                         content="\n".join(current_lines).strip(),
                     )
                 )
-            current_heading = stripped
+            current_heading = normalize_heading(stripped)
             current_lines = []
             continue
         current_lines.append(line)
@@ -96,8 +131,9 @@ def extract_metadata(lines: list[str]) -> dict[str, str]:
         value = match.group("value").strip()
         metadata[key] = value
 
-        # Legacy files vary between "FLELMR", "FLELMR Code", and similar labels.
-        if key.startswith("flelmr"):
+        # Legacy files vary between "FLELMR", "FLELMR Code", "EcoSpecies Code",
+        # and similar labels.
+        if key.startswith("flelmr") or key == "ecospecies code":
             metadata["flelmr"] = value
 
     return metadata
@@ -127,7 +163,7 @@ def extract_summary(lines: list[str], sections: list[Section]) -> str:
                 if summary_lines:
                     summary_lines.append("")
                 continue
-            if SECTION_PATTERN.fullmatch(stripped):
+            if is_section_heading(stripped):
                 break
             if stripped.startswith("[") and not summary_lines:
                 break
@@ -202,23 +238,76 @@ def parse_species_file(path: Path) -> SpeciesRecord:
     )
 
 
+def ensure_unique_record_slugs(records: list[SpeciesRecord]) -> list[SpeciesRecord]:
+    slug_counts = Counter(record.slug for record in records)
+    used_slugs: set[str] = set()
+
+    for record in records:
+        base_slug = record.slug
+        if slug_counts[base_slug] == 1 and base_slug not in used_slugs:
+            used_slugs.add(base_slug)
+            continue
+
+        disambiguator = slugify(Path(record.source_file).stem)
+        if disambiguator == base_slug:
+            disambiguator = hashlib.sha1(record.source_file.encode("utf-8")).hexdigest()[:8]
+
+        candidate = f"{base_slug}-{disambiguator}"
+        if candidate in used_slugs:
+            source_hash = hashlib.sha1(record.source_file.encode("utf-8")).hexdigest()[:8]
+            candidate = f"{candidate}-{source_hash}"
+
+        suffix = 2
+        while candidate in used_slugs:
+            candidate = f"{base_slug}-{disambiguator}-{suffix}"
+            suffix += 1
+
+        record.slug = candidate
+        used_slugs.add(candidate)
+
+    return records
+
+
 def load_species_records(data_dir: str) -> list[SpeciesRecord]:
-    base = Path(data_dir)
+    base = resolve_data_dir(data_dir)
     if not base.exists():
         return []
 
     records: list[SpeciesRecord] = []
     for path in sorted(base.glob("*.txt")):
         records.append(parse_species_file(path))
-    return records
+    return ensure_unique_record_slugs(records)
+
+
+def resolve_data_dir(data_dir: str) -> Path:
+    repo_root = get_repo_root().resolve()
+    raw_value = data_dir.strip()
+    if not raw_value:
+        raise ValueError("Species data directory cannot be empty.")
+
+    candidate = Path(raw_value)
+    if candidate.is_absolute():
+        resolved = candidate.resolve()
+    else:
+        resolved = (repo_root / candidate).resolve()
+
+    try:
+        relative = resolved.relative_to(repo_root)
+    except ValueError as exc:
+        raise ValueError("Species data directory must stay within the codebase directory.") from exc
+
+    if not relative.parts:
+        raise ValueError("Species data directory must be a subdirectory of the codebase.")
+
+    for part in relative.parts:
+        if not SAFE_DIRECTORY_NAME_PATTERN.fullmatch(part):
+            raise ValueError(
+                f"Species data directory contains an unsafe directory name: {part!r}."
+            )
+
+    return resolved
 
 
 def get_default_data_dir() -> str:
-    return os.environ.get(
-        "ECOSPECIES_DATA_DIR",
-        str(
-            Path(__file__).resolve().parents[4].parent
-            / "01-legacy-code-and-data"
-            / "InputFiles - TXT"
-        ),
-    )
+    configured = os.environ.get("ECOSPECIES_DATA_DIR", "input-data/InputFiles")
+    return str(resolve_data_dir(configured))
diff --git a/apps/api/src/ecospecies_api/repository.py b/apps/api/src/ecospecies_api/repository.py
index 1be8d43..42159d5 100644
--- a/apps/api/src/ecospecies_api/repository.py
+++ b/apps/api/src/ecospecies_api/repository.py
@@ -3,15 +3,45 @@ from __future__ import annotations
 from collections import Counter
 from datetime import datetime, timezone
 import json
+import hashlib
+import os
+from pathlib import Path
+import re
+import secrets
 
 from sqlalchemy import inspect, select, text
 from sqlalchemy.exc import SQLAlchemyError
 
+from ecospecies_api.citation_enrichment import (
+    apply_citation_candidate_selection,
+    discover_citation_candidates,
+    enrich_citation_payload,
+)
+from ecospecies_api.document_format import extract_species_projection, parse_markdown_document
+from ecospecies_api.document_format import add_citation_to_document, export_markdown_document
+from ecospecies_api.document_repository import (
+    get_species_document_payload,
+    save_species_document,
+    sync_species_document,
+)
 from ecospecies_api.db import SessionLocal, create_db_engine
-from ecospecies_api.models import Base, DocumentSection, IngestDiagnosticRecord, Species, SpeciesAuditLog
+from ecospecies_api.models import (
+    Base,
+    ContributorAccount,
+    DocumentSection,
+    IngestDiagnosticRecord,
+    Species,
+    SpeciesAuditLog,
+    SpeciesCitation,
+    SpeciesTaxonIdentifier,
+)
+from ecospecies_api.parser import get_default_data_dir, slugify
 
 WORKFLOW_STATUSES = {"draft", "review", "published"}
+CITATION_REVIEW_STATUSES = {"draft", "reviewed", "accepted", "rejected"}
 SYSTEM_IMPORT_USER = "system-import"
+CONTRIBUTOR_SUBMISSION_PREFIX = "contributor-submission"
+EMAIL_PATTERN = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
 
 
 def ensure_schema() -> None:
@@ -26,6 +56,12 @@ def ensure_schema() -> None:
         statements.append("ALTER TABLE species ADD COLUMN is_archived BOOLEAN DEFAULT FALSE")
     if "editor_notes" not in species_columns:
         statements.append("ALTER TABLE species ADD COLUMN editor_notes TEXT DEFAULT ''")
+    if "created_by" not in species_columns:
+        statements.append("ALTER TABLE species ADD COLUMN created_by VARCHAR(255) DEFAULT 'system-import'")
+    if "owner_username" not in species_columns:
+        statements.append("ALTER TABLE species ADD COLUMN owner_username VARCHAR(255) DEFAULT ''")
+    if "owner_role" not in species_columns:
+        statements.append("ALTER TABLE species ADD COLUMN owner_role VARCHAR(32) DEFAULT ''")
     if "last_modified_by" not in species_columns:
         statements.append("ALTER TABLE species ADD COLUMN last_modified_by VARCHAR(255) DEFAULT 'system-import'")
     if statements:
@@ -37,10 +73,144 @@ def ensure_schema() -> None:
                     "UPDATE species SET publication_status = COALESCE(publication_status, 'published'), "
                     "is_archived = COALESCE(is_archived, FALSE), "
                     "editor_notes = COALESCE(editor_notes, ''), "
+                    "created_by = COALESCE(created_by, 'system-import'), "
+                    "owner_username = COALESCE(owner_username, ''), "
+                    "owner_role = COALESCE(owner_role, ''), "
                     "last_modified_by = COALESCE(last_modified_by, 'system-import')"
                 )
             )
 
+    tables = set(inspector.get_table_names())
+    if "species_citation" in tables:
+        citation_columns = {
+            column["name"] for column in inspector.get_columns("species_citation")
+        }
+        citation_statements: list[str] = []
+        if "legacy_reference_number" not in citation_columns:
+            citation_statements.append(
+                "ALTER TABLE species_citation ADD COLUMN legacy_reference_number VARCHAR(64) DEFAULT ''"
+            )
+        if "citation_key" not in citation_columns:
+            citation_statements.append(
+                "ALTER TABLE species_citation ADD COLUMN citation_key VARCHAR(255) DEFAULT ''"
+            )
+        if "entry_type" not in citation_columns:
+            citation_statements.append(
+                "ALTER TABLE species_citation ADD COLUMN entry_type VARCHAR(64) DEFAULT 'misc'"
+            )
+        if "draft_bibtex" not in citation_columns:
+            citation_statements.append(
+                "ALTER TABLE species_citation ADD COLUMN draft_bibtex TEXT DEFAULT ''"
+            )
+        if "abstract_text" not in citation_columns:
+            citation_statements.append(
+                "ALTER TABLE species_citation ADD COLUMN abstract_text TEXT DEFAULT ''"
+            )
+        if "source_url" not in citation_columns:
+            citation_statements.append(
+                "ALTER TABLE species_citation ADD COLUMN source_url VARCHAR(500) DEFAULT ''"
+            )
+        if "openalex_id" not in citation_columns:
+            citation_statements.append(
+                "ALTER TABLE species_citation ADD COLUMN openalex_id VARCHAR(64) DEFAULT ''"
+            )
+        if "resolver_source_label" not in citation_columns:
+            citation_statements.append(
+                "ALTER TABLE species_citation ADD COLUMN resolver_source_label VARCHAR(255) DEFAULT ''"
+            )
+        if "enrichment_status" not in citation_columns:
+            citation_statements.append(
+                "ALTER TABLE species_citation ADD COLUMN enrichment_status VARCHAR(32) DEFAULT 'pending'"
+            )
+        if "enrichment_error" not in citation_columns:
+            citation_statements.append(
+                "ALTER TABLE species_citation ADD COLUMN enrichment_error TEXT DEFAULT ''"
+            )
+        if citation_statements:
+            with engine.begin() as connection:
+                for statement in citation_statements:
+                    connection.execute(text(statement))
+                connection.execute(
+                    text(
+                        "UPDATE species_citation SET "
+                        "legacy_reference_number = COALESCE(legacy_reference_number, ''), "
+                        "citation_key = COALESCE(citation_key, ''), "
+                        "entry_type = COALESCE(entry_type, 'misc'), "
+                        "abstract_text = COALESCE(abstract_text, ''), "
+                        "draft_bibtex = COALESCE(draft_bibtex, ''), "
+                        "source_url = COALESCE(source_url, ''), "
+                        "openalex_id = COALESCE(openalex_id, ''), "
+                        "resolver_source_label = COALESCE(resolver_source_label, ''), "
+                        "enrichment_status = COALESCE(enrichment_status, 'pending'), "
+                        "enrichment_error = COALESCE(enrichment_error, '')"
+                    )
+                )
+
+
+def _citation_to_payload(citation: SpeciesCitation) -> dict[str, object]:
+    return {
+        "id": citation.id,
+        "position": citation.position,
+        "section_heading": citation.section_heading,
+        "legacy_reference_number": citation.legacy_reference_number,
+        "citation_key": citation.citation_key,
+        "entry_type": citation.entry_type,
+        "raw_text": citation.raw_text,
+        "normalized_text": citation.normalized_text,
+        "abstract_text": citation.abstract_text,
+        "draft_bibtex": citation.draft_bibtex,
+        "doi": citation.doi,
+        "source_url": citation.source_url,
+        "openalex_id": citation.openalex_id,
+        "resolver_source_label": citation.resolver_source_label,
+        "enrichment_status": citation.enrichment_status,
+        "enrichment_error": citation.enrichment_error,
+        "source_type": citation.source_type,
+        "review_status": citation.review_status,
+    }
+
+
+def _structured_document_to_payload(species: Species) -> dict[str, object] | None:
+    if species.document is None:
+        return None
+
+    ast: dict[str, object] | None = None
+    raw_ast = str(species.document.ast_json or "").strip()
+    if raw_ast:
+        try:
+            parsed = json.loads(raw_ast)
+            if isinstance(parsed, dict):
+                ast = parsed
+        except json.JSONDecodeError:
+            ast = None
+
+    return {
+        "source_format": species.document.source_format,
+        "updated_by": species.document.updated_by,
+        "node_count": len(species.document.nodes),
+        "ast": ast,
+    }
+
+
+def _legacy_source_to_payload(species: Species) -> dict[str, object] | None:
+    source_file = str(species.source_file or "").strip()
+    if not source_file:
+        return None
+
+    try:
+        data_dir = Path(get_default_data_dir()).resolve()
+        candidate = (data_dir / source_file).resolve()
+        if candidate.parent != data_dir or not candidate.is_file():
+            return None
+        text = candidate.read_text(encoding="utf-8", errors="replace")
+    except (OSError, ValueError):
+        return None
+
+    return {
+        "source_file": source_file,
+        "text": text,
+    }
+
 
 def import_species_payload(payload: list[dict[str, object]]) -> None:
     ensure_schema()
@@ -66,6 +236,9 @@ def import_species_payload(payload: list[dict[str, object]]) -> None:
                     publication_status="published",
                     is_archived=False,
                     editor_notes="",
+                    created_by=SYSTEM_IMPORT_USER,
+                    owner_username="",
+                    owner_role="",
                     last_modified_by=SYSTEM_IMPORT_USER,
                 )
                 session.add(species)
@@ -142,7 +315,7 @@ def import_species_payload(payload: list[dict[str, object]]) -> None:
                     )
                 )
 
-            session.add(species)
+            sync_species_document(session, species, item)
 
         for slug, species in existing_species.items():
             if slug in incoming_slugs:
@@ -161,11 +334,15 @@ def import_species_payload(payload: list[dict[str, object]]) -> None:
                         ),
                     )
                 )
-            session.add(species)
-
         session.commit()
 
 
+def get_species_document(slug: str) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        return get_species_document_payload(session, slug)
+
+
 def _get_editor_preservation_state(species: Species) -> tuple[set[str], set[int]]:
     editorial_fields: set[str] = set()
     section_positions: set[int] = set()
@@ -215,6 +392,30 @@ def get_readiness_status() -> dict[str, object]:
 
 
 def _species_to_payload(species: Species, include_sections: bool = True) -> dict[str, object]:
+    legacy_identifiers: list[dict[str, object]] = []
+    if species.flelmr_code:
+        legacy_identifiers.append(
+            {
+                "authority": "legacy-ecospecies",
+                "identifier": species.flelmr_code,
+                "label": "FLELMR",
+            }
+        )
+    taxon_identifiers = [
+        {
+            "authority": item.authority,
+            "identifier": item.identifier,
+            "label": item.label,
+            "primary": item.is_primary,
+            "source_url": item.source_url,
+        }
+        for item in species.taxon_identifiers
+    ]
+    primary_taxon_identifier = next(
+        (item for item in taxon_identifiers if bool(item.get("primary"))),
+        None,
+    )
+
     return {
         "slug": species.slug,
         "source_file": species.source_file,
@@ -222,8 +423,15 @@ def _species_to_payload(species: Species, include_sections: bool = True) -> dict
         "common_name": species.common_name,
         "scientific_name": species.scientific_name,
         "flelmr_code": species.flelmr_code,
+        "legacy_identifiers": legacy_identifiers,
+        "taxon_identifiers": taxon_identifiers,
+        "primary_taxon_authority": (
+            str(primary_taxon_identifier.get("authority", "")) if primary_taxon_identifier else ""
+        ),
+        "primary_taxon_identifier": primary_taxon_identifier,
         "summary": species.summary,
         "section_count": species.section_count,
+        "citation_count": len(species.citations),
         "publication_status": species.publication_status,
         "is_archived": species.is_archived,
         "editor_notes": species.editor_notes,
@@ -232,6 +440,11 @@ def _species_to_payload(species: Species, include_sections: bool = True) -> dict
             {"level": diagnostic.level, "code": diagnostic.code, "message": diagnostic.message}
             for diagnostic in species.diagnostics
         ],
+        "citations": [
+            _citation_to_payload(citation) for citation in species.citations
+        ],
+        "structured_document": _structured_document_to_payload(species) if include_sections else None,
+        "legacy_source": _legacy_source_to_payload(species) if include_sections else None,
         "sections": (
             [
                 {
@@ -290,6 +503,7 @@ def get_species_by_slug(
             return None
         _ = species.sections
         _ = species.diagnostics
+        _ = species.citations
         return _species_to_payload(species, include_sections=True)
 
 
@@ -321,10 +535,135 @@ def list_diagnostics() -> list[dict[str, object]]:
     ]
 
 
+def list_public_bibliography(search: str = "") -> list[dict[str, object]]:
+    ensure_schema()
+    with SessionLocal() as session:
+        species_records = list(
+            session.scalars(
+                select(Species)
+                .where(
+                    Species.publication_status == "published",
+                    Species.is_archived.is_(False),
+                )
+                .order_by(Species.common_name, Species.title)
+            )
+        )
+
+        entries: dict[str, dict[str, object]] = {}
+        for species in species_records:
+            _ = species.citations
+            for citation in species.citations:
+                doi_key = str(citation.doi).strip().lower()
+                openalex_key = str(citation.openalex_id).strip().lower()
+                citation_key = str(citation.citation_key).strip().lower()
+                normalized_key = " ".join(str(citation.normalized_text).split()).strip().lower()
+                raw_key = " ".join(str(citation.raw_text).split()).strip().lower()
+                dedupe_key = (
+                    f"doi:{doi_key}" if doi_key else ""
+                ) or (
+                    f"openalex:{openalex_key}" if openalex_key else ""
+                ) or (
+                    f"key:{citation_key}" if citation_key else ""
+                ) or (
+                    f"normalized:{normalized_key}" if normalized_key else ""
+                ) or (
+                    f"raw:{raw_key}" if raw_key else ""
+                )
+                if not dedupe_key:
+                    continue
+
+                entry = entries.get(dedupe_key)
+                if entry is None:
+                    entry = {
+                        **_citation_to_payload(citation),
+                        "species_refs": [],
+                        "_species_ref_keys": set(),
+                        "_legacy_reference_numbers": set(),
+                    }
+                    entries[dedupe_key] = entry
+
+                if not entry.get("normalized_text") and citation.normalized_text:
+                    entry["normalized_text"] = citation.normalized_text
+                if not entry.get("abstract_text") and citation.abstract_text:
+                    entry["abstract_text"] = citation.abstract_text
+                if not entry.get("draft_bibtex") and citation.draft_bibtex:
+                    entry["draft_bibtex"] = citation.draft_bibtex
+                if not entry.get("doi") and citation.doi:
+                    entry["doi"] = citation.doi
+                if not entry.get("source_url") and citation.source_url:
+                    entry["source_url"] = citation.source_url
+                if not entry.get("openalex_id") and citation.openalex_id:
+                    entry["openalex_id"] = citation.openalex_id
+
+                species_ref_key = species.slug
+                if species_ref_key not in entry["_species_ref_keys"]:
+                    entry["_species_ref_keys"].add(species_ref_key)
+                    entry["species_refs"].append(
+                        {
+                            "slug": species.slug,
+                            "common_name": species.common_name,
+                            "scientific_name": species.scientific_name,
+                        }
+                    )
+                if citation.legacy_reference_number:
+                    entry["_legacy_reference_numbers"].add(citation.legacy_reference_number)
+
+        items: list[dict[str, object]] = []
+        needle = search.strip().lower()
+        for entry in entries.values():
+            legacy_numbers = sorted(entry.pop("_legacy_reference_numbers"))
+            entry.pop("_species_ref_keys", None)
+            entry["legacy_reference_numbers"] = legacy_numbers
+            entry["species_count"] = len(entry["species_refs"])
+
+            if needle:
+                haystack = " ".join(
+                    [
+                        str(entry.get("normalized_text", "")),
+                        str(entry.get("raw_text", "")),
+                        str(entry.get("citation_key", "")),
+                        str(entry.get("doi", "")),
+                        str(entry.get("abstract_text", "")),
+                        str(entry.get("draft_bibtex", "")),
+                    ]
+                ).lower()
+                if needle not in haystack:
+                    continue
+            items.append(entry)
+
+        items.sort(key=lambda item: (str(item.get("normalized_text", "") or item.get("raw_text", "")).lower(), str(item.get("citation_key", "")).lower()))
+        return items
+
+
 def get_editor_species_list(search: str = "") -> list[dict[str, object]]:
     return list_species(search=search, include_unpublished=True, include_archived=True)
 
 
+def get_contributor_species_list(username: str, search: str = "") -> list[dict[str, object]]:
+    ensure_schema()
+    with SessionLocal() as session:
+        query = (
+            select(Species)
+            .where(
+                Species.owner_role == "contributor",
+                Species.owner_username == username,
+            )
+            .order_by(Species.common_name, Species.title)
+        )
+        species = list(session.scalars(query))
+        payload = [_species_to_payload(item, include_sections=False) for item in species]
+        if search:
+            needle = search.lower()
+            payload = [
+                item
+                for item in payload
+                if needle in item["common_name"].lower()
+                or needle in item["scientific_name"].lower()
+                or needle in item["title"].lower()
+            ]
+        return payload
+
+
 def get_editor_species_workflow(slug: str) -> dict[str, object] | None:
     item = get_species_by_slug(slug, include_unpublished=True, include_archived=True)
     if item is None:
@@ -345,6 +684,61 @@ def get_editor_species_detail(slug: str) -> dict[str, object] | None:
     return get_species_by_slug(slug, include_unpublished=True, include_archived=True)
 
 
+def get_contributor_species_detail(slug: str, username: str) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        species = session.scalar(
+            select(Species).where(
+                Species.slug == slug,
+                Species.owner_role == "contributor",
+                Species.owner_username == username,
+            )
+        )
+        if species is None:
+            return None
+        _ = species.sections
+        _ = species.diagnostics
+        _ = species.citations
+        return _species_to_payload(species, include_sections=True)
+
+
+def _citation_list_payload(species: Species) -> dict[str, object]:
+    return {
+        "slug": species.slug,
+        "title": species.title,
+        "common_name": species.common_name,
+        "scientific_name": species.scientific_name,
+        "citation_count": len(species.citations),
+        "citations": [_citation_to_payload(citation) for citation in species.citations],
+    }
+
+
+def get_editor_species_citations(slug: str) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        species = session.scalar(select(Species).where(Species.slug == slug))
+        if species is None:
+            return None
+        _ = species.citations
+        return _citation_list_payload(species)
+
+
+def get_contributor_species_citations(slug: str, username: str) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        species = session.scalar(
+            select(Species).where(
+                Species.slug == slug,
+                Species.owner_role == "contributor",
+                Species.owner_username == username,
+            )
+        )
+        if species is None:
+            return None
+        _ = species.citations
+        return _citation_list_payload(species)
+
+
 def list_species_audit(slug: str) -> list[dict[str, object]] | None:
     ensure_schema()
     with SessionLocal() as session:
@@ -441,6 +835,158 @@ def update_species_editorial(
         }
 
 
+def _normalize_email(email: str) -> str:
+    normalized = email.strip().lower()
+    if not EMAIL_PATTERN.fullmatch(normalized):
+        raise ValueError("Contributor username must be a valid email address.")
+    return normalized
+
+
+def get_minimum_contributor_age() -> int:
+    configured = os.environ.get("ECOSPECIES_MINIMUM_CONTRIBUTOR_AGE", "13").strip()
+    try:
+        minimum_age = int(configured)
+    except ValueError as exc:  # pragma: no cover - misconfiguration path
+        raise ValueError("ECOSPECIES_MINIMUM_CONTRIBUTOR_AGE must be an integer.") from exc
+    if minimum_age < 1:
+        raise ValueError("ECOSPECIES_MINIMUM_CONTRIBUTOR_AGE must be positive.")
+    return minimum_age
+
+
+def register_contributor(email: str, age_gate_confirmed: bool) -> dict[str, object]:
+    ensure_schema()
+    normalized_email = _normalize_email(email)
+    minimum_age = get_minimum_contributor_age()
+    if not age_gate_confirmed:
+        raise ValueError(
+            f"Contributors must confirm they are at least {minimum_age} years old."
+        )
+
+    token = secrets.token_urlsafe(24)
+    token_hash = hashlib.sha256(token.encode("utf-8")).hexdigest()
+    now = datetime.now(timezone.utc).isoformat()
+
+    with SessionLocal() as session:
+        existing = session.scalar(
+            select(ContributorAccount).where(ContributorAccount.email == normalized_email)
+        )
+        if existing is not None:
+            raise ValueError("A contributor account already exists for that email address.")
+
+        session.add(
+            ContributorAccount(
+                email=normalized_email,
+                token_hash=token_hash,
+                age_gate_confirmed=True,
+                created_at=now,
+                is_active=True,
+            )
+        )
+        session.commit()
+
+    return {
+        "username": normalized_email,
+        "role": "contributor",
+        "token": token,
+        "minimum_age": minimum_age,
+        "warning": "Store this token now. You will not be able to access your contributed species later without it.",
+    }
+
+
+def _build_initial_contributor_markdown(email: str) -> str:
+    title = "New Species Draft"
+    return (
+        "---\n"
+        f"title: {title}\n"
+        "common_name: \n"
+        "scientific_name: \n"
+        "species_code: \n"
+        f"source_file: {CONTRIBUTOR_SUBMISSION_PREFIX}.md\n"
+        "publication_status: draft\n"
+        "source_format: ecospecies-markdown-v1\n"
+        "---\n\n"
+        "## Summary\n"
+        "Provide a concise summary.\n\n"
+        "## Habitat\n"
+        "Describe habitat.\n"
+    )
+
+
+def _next_unique_slug(session, base_slug: str) -> str:
+    candidate = base_slug
+    suffix = 2
+    while session.scalar(select(Species.id).where(Species.slug == candidate)) is not None:
+        candidate = f"{base_slug}-{suffix}"
+        suffix += 1
+    return candidate
+
+
+def create_contributor_species(username: str, markdown: str | None = None) -> dict[str, object]:
+    ensure_schema()
+    normalized_email = _normalize_email(username)
+    source_markdown = (markdown or _build_initial_contributor_markdown(normalized_email)).strip()
+    if not source_markdown.endswith("\n"):
+        source_markdown += "\n"
+
+    with SessionLocal() as session:
+        document_model = parse_markdown_document(source_markdown)
+        projection = extract_species_projection(document_model)
+        slug_base = slugify(
+            str(projection.get("common_name") or projection.get("title") or CONTRIBUTOR_SUBMISSION_PREFIX)
+        )
+        slug = _next_unique_slug(session, slug_base)
+        species = Species(
+            slug=slug,
+            source_file=f"{CONTRIBUTOR_SUBMISSION_PREFIX}-{slug}.md",
+            title=str(projection.get("title") or "New Species Draft"),
+            common_name=str(projection.get("common_name") or ""),
+            scientific_name=str(projection.get("scientific_name") or ""),
+            flelmr_code=str(projection.get("flelmr_code") or ""),
+            summary=str(projection.get("summary") or ""),
+            section_count=len(projection["sections"]),
+            publication_status="draft",
+            is_archived=False,
+            editor_notes="",
+            created_by=normalized_email,
+            owner_username=normalized_email,
+            owner_role="contributor",
+            last_modified_by=normalized_email,
+        )
+        session.add(species)
+        session.flush()
+        save_species_document(session, species, source_markdown, normalized_email)
+        session.add(
+            SpeciesAuditLog(
+                species_id=species.id,
+                changed_by=normalized_email,
+                changed_at=datetime.now(timezone.utc).isoformat(),
+                action="contributor_create",
+                details_json=json.dumps({"publication_status": "draft"}, ensure_ascii=True),
+            )
+        )
+        session.commit()
+        return {
+            "slug": species.slug,
+            "publication_status": species.publication_status,
+            "last_modified_by": species.last_modified_by,
+        }
+
+
+def get_contributor_species_document(slug: str, username: str) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        species = session.scalar(
+            select(Species).where(
+                Species.slug == slug,
+                Species.owner_role == "contributor",
+                Species.owner_username == username,
+            )
+        )
+        if species is None:
+            return None
+        return get_species_document_payload(session, slug)
+
+
 def update_species_section(
     slug: str,
     section_position: int,
@@ -506,3 +1052,595 @@ def update_species_section(
             "last_modified_by": species.last_modified_by,
             "changed_fields": changed_fields,
         }
+
+
+def update_species_document_markdown(
+    slug: str,
+    markdown: str,
+    username: str,
+) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        species = session.scalar(select(Species).where(Species.slug == slug))
+        if species is None:
+            return None
+
+        result = save_species_document(session, species, markdown, username)
+        session.add(
+            SpeciesAuditLog(
+                species_id=species.id,
+                changed_by=username,
+                changed_at=datetime.now(timezone.utc).isoformat(),
+                action="document_update",
+                details_json=json.dumps(
+                    {"source_format": "ecospecies-markdown-v1"},
+                    ensure_ascii=True,
+                ),
+            )
+        )
+        session.commit()
+        return result
+
+
+def update_species_citation_review(
+    slug: str,
+    citation_id: int,
+    review_status: str | None,
+    normalized_text: str | None,
+    doi: str | None,
+    citation_key: str | None,
+    entry_type: str | None,
+    draft_bibtex: str | None,
+    username: str,
+    *,
+    abstract_text: str | None = None,
+) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        species = session.scalar(select(Species).where(Species.slug == slug))
+        if species is None:
+            return None
+
+        citation = session.scalar(
+            select(SpeciesCitation).where(
+                SpeciesCitation.species_id == species.id,
+                SpeciesCitation.id == citation_id,
+            )
+        )
+        if citation is None:
+            return None
+
+        before = _citation_to_payload(citation)
+        if review_status is not None:
+            normalized_status = review_status.strip().lower()
+            if normalized_status not in CITATION_REVIEW_STATUSES:
+                raise ValueError(
+                    f"Unsupported review_status: {review_status}. "
+                    f"Expected one of {sorted(CITATION_REVIEW_STATUSES)}"
+                )
+            citation.review_status = normalized_status
+        if normalized_text is not None:
+            citation.normalized_text = normalized_text.strip()
+        if abstract_text is not None:
+            citation.abstract_text = abstract_text.strip()
+        if doi is not None:
+            citation.doi = doi.strip()
+        if citation_key is not None:
+            citation.citation_key = citation_key.strip()
+        if entry_type is not None:
+            citation.entry_type = entry_type.strip() or "misc"
+        if draft_bibtex is not None:
+            citation.draft_bibtex = draft_bibtex.strip()
+        citation.source_type = "editor_review"
+
+        after = _citation_to_payload(citation)
+        changed_fields = {
+            field: {"from": before[field], "to": after[field]}
+            for field in (
+                "review_status",
+                "normalized_text",
+                "abstract_text",
+                "doi",
+                "citation_key",
+                "entry_type",
+                "draft_bibtex",
+                "source_type",
+            )
+            if before[field] != after[field]
+        }
+
+        if changed_fields:
+            species.last_modified_by = username
+            session.add(
+                SpeciesAuditLog(
+                    species_id=species.id,
+                    changed_by=username,
+                    changed_at=datetime.now(timezone.utc).isoformat(),
+                    action="citation_review_update",
+                    details_json=json.dumps(
+                        {"citation_id": citation.id, **changed_fields},
+                        ensure_ascii=True,
+                    ),
+                )
+            )
+
+        session.add(citation)
+        session.add(species)
+        session.commit()
+        session.refresh(citation)
+
+        return {
+            "slug": species.slug,
+            "citation": _citation_to_payload(citation),
+            "last_modified_by": species.last_modified_by,
+            "changed_fields": changed_fields,
+        }
+
+
+def get_species_citation_candidates(slug: str, citation_id: int) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        species = session.scalar(select(Species).where(Species.slug == slug))
+        if species is None:
+            return None
+
+        citation = session.scalar(
+            select(SpeciesCitation).where(
+                SpeciesCitation.species_id == species.id,
+                SpeciesCitation.id == citation_id,
+            )
+        )
+        if citation is None:
+            return None
+
+        candidates = discover_citation_candidates(_citation_to_payload(citation))
+        return {
+            "slug": species.slug,
+            "citation": _citation_to_payload(citation),
+            **candidates,
+        }
+
+
+def _apply_citation_enrichment(
+    session,
+    species: Species,
+    citation: SpeciesCitation,
+    username: str,
+) -> dict[str, object]:
+    before = _citation_to_payload(citation)
+    enrichment = enrich_citation_payload(before)
+
+    for field in (
+        "citation_key",
+        "entry_type",
+        "normalized_text",
+        "abstract_text",
+        "draft_bibtex",
+        "doi",
+        "source_url",
+        "openalex_id",
+        "resolver_source_label",
+        "enrichment_status",
+        "enrichment_error",
+    ):
+        if field in enrichment:
+            setattr(citation, field, str(enrichment.get(field, "")).strip())
+
+    after = _citation_to_payload(citation)
+    changed_fields = {
+        field: {"from": before[field], "to": after[field]}
+        for field in (
+            "citation_key",
+            "entry_type",
+            "normalized_text",
+            "abstract_text",
+            "draft_bibtex",
+            "doi",
+            "source_url",
+            "openalex_id",
+            "resolver_source_label",
+            "enrichment_status",
+            "enrichment_error",
+        )
+        if before[field] != after[field]
+    }
+    conflicts = enrichment.get("conflicts")
+    if conflicts:
+        changed_fields["resolver_conflicts"] = list(conflicts)
+
+    if changed_fields:
+        species.last_modified_by = username
+        session.add(
+            SpeciesAuditLog(
+                species_id=species.id,
+                changed_by=username,
+                changed_at=datetime.now(timezone.utc).isoformat(),
+                action="citation_enrichment",
+                details_json=json.dumps(
+                    {"citation_id": citation.id, **changed_fields},
+                    ensure_ascii=True,
+                ),
+            )
+        )
+
+    session.add(citation)
+    session.add(species)
+    return {
+        "citation": _citation_to_payload(citation),
+        "changed_fields": changed_fields,
+    }
+
+
+def _next_citation_position(species: Species) -> int:
+    if not species.citations:
+        return 1
+    return max(citation.position for citation in species.citations) + 1
+
+
+def update_species_citation_enrichment(
+    slug: str,
+    citation_id: int,
+    username: str,
+) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        species = session.scalar(select(Species).where(Species.slug == slug))
+        if species is None:
+            return None
+
+        citation = session.scalar(
+            select(SpeciesCitation).where(
+                SpeciesCitation.species_id == species.id,
+                SpeciesCitation.id == citation_id,
+            )
+        )
+        if citation is None:
+            return None
+
+        result = _apply_citation_enrichment(session, species, citation, username)
+        session.commit()
+        session.refresh(citation)
+
+        return {
+            "slug": species.slug,
+            "citation": result["citation"],
+            "last_modified_by": species.last_modified_by,
+            "changed_fields": result["changed_fields"],
+        }
+
+
+def apply_species_citation_candidate_selection(
+    slug: str,
+    citation_id: int,
+    candidate: dict[str, object],
+    username: str,
+) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        species = session.scalar(select(Species).where(Species.slug == slug))
+        if species is None:
+            return None
+
+        citation = session.scalar(
+            select(SpeciesCitation).where(
+                SpeciesCitation.species_id == species.id,
+                SpeciesCitation.id == citation_id,
+            )
+        )
+        if citation is None:
+            return None
+
+        before = _citation_to_payload(citation)
+        enrichment = apply_citation_candidate_selection(before, candidate)
+        for field in (
+            "citation_key",
+            "entry_type",
+            "normalized_text",
+            "abstract_text",
+            "draft_bibtex",
+            "doi",
+            "source_url",
+            "openalex_id",
+            "resolver_source_label",
+            "enrichment_status",
+            "enrichment_error",
+        ):
+            if field in enrichment:
+                setattr(citation, field, str(enrichment.get(field, "")).strip())
+        citation.source_type = "editor_selected_candidate"
+        citation.review_status = "accepted"
+
+        after = _citation_to_payload(citation)
+        changed_fields = {
+            field: {"from": before[field], "to": after[field]}
+            for field in (
+                "citation_key",
+                "entry_type",
+                "normalized_text",
+                "abstract_text",
+                "draft_bibtex",
+                "doi",
+                "source_url",
+                "openalex_id",
+                "resolver_source_label",
+                "enrichment_status",
+                "enrichment_error",
+                "source_type",
+                "review_status",
+            )
+            if before[field] != after[field]
+        }
+
+        if changed_fields:
+            species.last_modified_by = username
+            session.add(
+                SpeciesAuditLog(
+                    species_id=species.id,
+                    changed_by=username,
+                    changed_at=datetime.now(timezone.utc).isoformat(),
+                    action="citation_candidate_selection",
+                    details_json=json.dumps(
+                        {
+                            "citation_id": citation.id,
+                            "selected_source_label": str(candidate.get("source_label", "")).strip(),
+                            **changed_fields,
+                        },
+                        ensure_ascii=True,
+                    ),
+                )
+            )
+
+        session.add(citation)
+        session.add(species)
+        session.commit()
+        session.refresh(citation)
+        return {
+            "slug": species.slug,
+            "citation": _citation_to_payload(citation),
+            "last_modified_by": species.last_modified_by,
+            "changed_fields": changed_fields,
+        }
+
+
+def add_species_citation_from_candidate(
+    slug: str,
+    citation_id: int,
+    candidate: dict[str, object],
+    username: str,
+) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        species = session.scalar(select(Species).where(Species.slug == slug))
+        if species is None:
+            return None
+
+        citation = session.scalar(
+            select(SpeciesCitation).where(
+                SpeciesCitation.species_id == species.id,
+                SpeciesCitation.id == citation_id,
+            )
+        )
+        if citation is None:
+            return None
+
+        enrichment = apply_citation_candidate_selection(_citation_to_payload(citation), candidate)
+        raw_text = (
+            str(enrichment.get("normalized_text", "")).strip()
+            or str(candidate.get("fields", {}).get("title", "")).strip()
+            or str(citation.raw_text).strip()
+        )
+
+        document_markdown = species.document.markdown_content if species.document is not None else ""
+        document_model = parse_markdown_document(document_markdown)
+        added = add_citation_to_document(document_model, raw_text, heading_title="Related References")
+        updated_markdown = export_markdown_document(document_model)
+        save_species_document(session, species, updated_markdown, username)
+
+        new_citation = session.scalar(
+            select(SpeciesCitation).where(
+                SpeciesCitation.species_id == species.id,
+                SpeciesCitation.raw_text == raw_text,
+            )
+        )
+        if new_citation is None:
+            return None
+
+        new_citation.source_type = "editor_added_candidate"
+        new_citation.review_status = "accepted"
+        new_citation.citation_key = str(enrichment.get("citation_key", "")).strip()
+        new_citation.entry_type = str(enrichment.get("entry_type", "misc")).strip() or "misc"
+        new_citation.normalized_text = str(enrichment.get("normalized_text", "")).strip()
+        new_citation.abstract_text = str(enrichment.get("abstract_text", "")).strip()
+        new_citation.draft_bibtex = str(enrichment.get("draft_bibtex", "")).strip()
+        new_citation.doi = str(enrichment.get("doi", "")).strip()
+        new_citation.source_url = str(enrichment.get("source_url", "")).strip()
+        new_citation.openalex_id = str(enrichment.get("openalex_id", "")).strip()
+        new_citation.resolver_source_label = str(enrichment.get("resolver_source_label", "")).strip()
+        new_citation.enrichment_status = str(enrichment.get("enrichment_status", "resolved")).strip()
+        new_citation.enrichment_error = str(enrichment.get("enrichment_error", "")).strip()
+
+        if not added:
+            new_citation.source_type = "editor_added_candidate"
+            new_citation.review_status = "accepted"
+
+        session.add(new_citation)
+        species.last_modified_by = username
+        session.add(
+            SpeciesAuditLog(
+                species_id=species.id,
+                changed_by=username,
+                changed_at=datetime.now(timezone.utc).isoformat(),
+                action="citation_candidate_addition",
+                details_json=json.dumps(
+                    {
+                        "source_citation_id": citation.id,
+                        "selected_source_label": str(candidate.get("source_label", "")).strip(),
+                        "new_citation_key": new_citation.citation_key,
+                    },
+                    ensure_ascii=True,
+                ),
+            )
+        )
+        session.commit()
+        session.refresh(new_citation)
+        return {
+            "slug": species.slug,
+            "citation": _citation_to_payload(new_citation),
+            "last_modified_by": species.last_modified_by,
+        }
+
+
+def update_species_citations_enrichment_batch(
+    slug: str,
+    username: str,
+) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        species = session.scalar(select(Species).where(Species.slug == slug))
+        if species is None:
+            return None
+
+        _ = species.citations
+        updated_citations: list[dict[str, object]] = []
+        changed_count = 0
+        resolved_count = 0
+        unresolved_count = 0
+        error_count = 0
+
+        for citation in species.citations:
+            result = _apply_citation_enrichment(session, species, citation, username)
+            updated_citations.append(result["citation"])
+            if result["changed_fields"]:
+                changed_count += 1
+            status = str(result["citation"].get("enrichment_status", "")).strip()
+            if status == "resolved":
+                resolved_count += 1
+            elif status == "unresolved":
+                unresolved_count += 1
+            elif status == "error":
+                error_count += 1
+
+        session.commit()
+
+        return {
+            "slug": species.slug,
+            "citation_count": len(updated_citations),
+            "changed_count": changed_count,
+            "resolved_count": resolved_count,
+            "unresolved_count": unresolved_count,
+            "error_count": error_count,
+            "citations": updated_citations,
+            "last_modified_by": species.last_modified_by,
+        }
+
+
+def _should_backfill_citation(citation: SpeciesCitation, include_accepted: bool = False) -> bool:
+    review_status = str(citation.review_status or "").strip().lower()
+    source_type = str(citation.source_type or "").strip().lower()
+    enrichment_status = str(citation.enrichment_status or "").strip().lower()
+    normalized_text = str(citation.normalized_text or "").strip()
+    abstract_text = str(citation.abstract_text or "").strip()
+
+    if not include_accepted and review_status == "accepted":
+        return False
+    if source_type in {"editor_selected_candidate", "editor_added_candidate"} and not include_accepted:
+        return False
+
+    return (
+        source_type in {"document_extract", "editor_review", ""}
+        or enrichment_status in {"pending", "unresolved", "error", ""}
+        or not normalized_text
+        or not abstract_text
+    )
+
+
+def backfill_species_citations(
+    slug: str,
+    username: str,
+    include_accepted: bool = False,
+) -> dict[str, object] | None:
+    ensure_schema()
+    with SessionLocal() as session:
+        species = session.scalar(select(Species).where(Species.slug == slug))
+        if species is None:
+            return None
+
+        _ = species.citations
+        updated_citations: list[dict[str, object]] = []
+        changed_count = 0
+        resolved_count = 0
+        unresolved_count = 0
+        error_count = 0
+        backfilled_count = 0
+
+        for citation in species.citations:
+            if _should_backfill_citation(citation, include_accepted=include_accepted):
+                backfilled_count += 1
+                result = _apply_citation_enrichment(session, species, citation, username)
+                payload = result["citation"]
+                if result["changed_fields"]:
+                    changed_count += 1
+            else:
+                payload = _citation_to_payload(citation)
+            updated_citations.append(payload)
+            status = str(payload.get("enrichment_status", "")).strip()
+            if status == "resolved":
+                resolved_count += 1
+            elif status == "unresolved":
+                unresolved_count += 1
+            elif status == "error":
+                error_count += 1
+
+        session.commit()
+
+        return {
+            "slug": species.slug,
+            "citation_count": len(updated_citations),
+            "backfilled_count": backfilled_count,
+            "changed_count": changed_count,
+            "resolved_count": resolved_count,
+            "unresolved_count": unresolved_count,
+            "error_count": error_count,
+            "citations": updated_citations,
+            "last_modified_by": species.last_modified_by,
+        }
+
+
+def update_contributor_species_document_markdown(
+    slug: str,
+    markdown: str,
+    username: str,
+) -> dict[str, object] | None:
+    ensure_schema()
+    normalized_email = _normalize_email(username)
+    with SessionLocal() as session:
+        species = session.scalar(
+            select(Species).where(
+                Species.slug == slug,
+                Species.owner_role == "contributor",
+                Species.owner_username == normalized_email,
+            )
+        )
+        if species is None:
+            return None
+
+        result = save_species_document(session, species, markdown, normalized_email)
+        if species.publication_status == "published":
+            species.publication_status = "review"
+        session.add(
+            SpeciesAuditLog(
+                species_id=species.id,
+                changed_by=normalized_email,
+                changed_at=datetime.now(timezone.utc).isoformat(),
+                action="contributor_document_update",
+                details_json=json.dumps(
+                    {"source_format": "ecospecies-markdown-v1"},
+                    ensure_ascii=True,
+                ),
+            )
+        )
+        session.commit()
+        return {
+            **result,
+            "publication_status": species.publication_status,
+        }
diff --git a/apps/api/test_auth.py b/apps/api/test_auth.py
new file mode 100644
index 0000000..9be4678
--- /dev/null
+++ b/apps/api/test_auth.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent
+SRC = ROOT / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+TEST_PATH = ROOT / "tests" / "test_auth.py"
+SPEC = importlib.util.spec_from_file_location("ecospecies_api_test_auth", TEST_PATH)
+MODULE = importlib.util.module_from_spec(SPEC)
+assert SPEC is not None and SPEC.loader is not None
+SPEC.loader.exec_module(MODULE)
+
+for name in dir(MODULE):
+    if name.startswith("Test") or name.endswith("Tests"):
+        globals()[name] = getattr(MODULE, name)
diff --git a/apps/api/test_citation_enrichment.py b/apps/api/test_citation_enrichment.py
new file mode 100644
index 0000000..e9d9eaf
--- /dev/null
+++ b/apps/api/test_citation_enrichment.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent
+SRC = ROOT / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+TEST_PATH = ROOT / "tests" / "test_citation_enrichment.py"
+SPEC = importlib.util.spec_from_file_location("ecospecies_api_test_citation_enrichment", TEST_PATH)
+MODULE = importlib.util.module_from_spec(SPEC)
+assert SPEC is not None and SPEC.loader is not None
+SPEC.loader.exec_module(MODULE)
+
+for name in dir(MODULE):
+    if name.startswith("Test") or name.endswith("Tests"):
+        globals()[name] = getattr(MODULE, name)
diff --git a/apps/api/test_document_format.py b/apps/api/test_document_format.py
new file mode 100644
index 0000000..151b0d5
--- /dev/null
+++ b/apps/api/test_document_format.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent
+SRC = ROOT / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+TEST_PATH = ROOT / "tests" / "test_document_format.py"
+SPEC = importlib.util.spec_from_file_location("ecospecies_api_test_document_format", TEST_PATH)
+MODULE = importlib.util.module_from_spec(SPEC)
+assert SPEC is not None and SPEC.loader is not None
+SPEC.loader.exec_module(MODULE)
+
+for name in dir(MODULE):
+    if name.startswith("Test") or name.endswith("Tests"):
+        globals()[name] = getattr(MODULE, name)
diff --git a/apps/api/test_parser.py b/apps/api/test_parser.py
new file mode 100644
index 0000000..c15455d
--- /dev/null
+++ b/apps/api/test_parser.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent
+SRC = ROOT / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+TEST_PATH = ROOT / "tests" / "test_parser.py"
+SPEC = importlib.util.spec_from_file_location("ecospecies_api_test_parser", TEST_PATH)
+MODULE = importlib.util.module_from_spec(SPEC)
+assert SPEC is not None and SPEC.loader is not None
+SPEC.loader.exec_module(MODULE)
+
+for name in dir(MODULE):
+    if name.startswith("Test") or name.endswith("Tests"):
+        globals()[name] = getattr(MODULE, name)
diff --git a/apps/api/tests/test_auth.py b/apps/api/tests/test_auth.py
new file mode 100644
index 0000000..5a9a736
--- /dev/null
+++ b/apps/api/tests/test_auth.py
@@ -0,0 +1,58 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+
+from ecospecies_api import auth, repository
+
+
+class ContributorAuthTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tempdir = tempfile.TemporaryDirectory()
+        db_path = Path(self.tempdir.name) / "test.db"
+        self.engine = create_engine(f"sqlite:///{db_path}", future=True)
+        self.session_local = sessionmaker(
+            bind=self.engine,
+            autoflush=False,
+            autocommit=False,
+            future=True,
+        )
+        self.repository_engine_patch = patch.object(repository, "create_db_engine", return_value=self.engine)
+        self.repository_session_patch = patch.object(repository, "SessionLocal", self.session_local)
+        self.auth_engine_patch = patch.object(auth, "create_db_engine", return_value=self.engine)
+        self.auth_session_patch = patch.object(auth, "SessionLocal", self.session_local)
+        self.repository_engine_patch.start()
+        self.repository_session_patch.start()
+        self.auth_engine_patch.start()
+        self.auth_session_patch.start()
+
+    def tearDown(self) -> None:
+        self.auth_session_patch.stop()
+        self.auth_engine_patch.stop()
+        self.repository_session_patch.stop()
+        self.repository_engine_patch.stop()
+        self.engine.dispose()
+        self.tempdir.cleanup()
+
+    def test_contributor_token_resolves_to_contributor_session(self) -> None:
+        registration = repository.register_contributor("author@example.org", True)
+
+        session = auth.resolve_auth_session({"Authorization": f"Bearer {registration['token']}"})
+
+        self.assertIsNotNone(session)
+        assert session is not None
+        self.assertEqual(session.username, "author@example.org")
+        self.assertEqual(session.role, "contributor")
+
+    def test_contributor_role_does_not_satisfy_editor(self) -> None:
+        self.assertTrue(auth.role_satisfies("editor", "contributor"))
+        self.assertFalse(auth.role_satisfies("contributor", "editor"))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/apps/api/tests/test_citation_enrichment.py b/apps/api/tests/test_citation_enrichment.py
new file mode 100644
index 0000000..501941a
--- /dev/null
+++ b/apps/api/tests/test_citation_enrichment.py
@@ -0,0 +1,527 @@
+from __future__ import annotations
+
+import unittest
+from unittest.mock import patch
+
+from ecospecies_api.citation_enrichment import (
+    _crossref_message_to_entry,
+    _datacite_item_to_entry,
+    _openalex_work_to_entry,
+    _render_normalized_text,
+    apply_citation_candidate_selection,
+    discover_citation_candidates,
+    enrich_citation_payload,
+    LocalBibEntry,
+    LocalMetadataResolver,
+    LocalResolution,
+)
+from ecospecies_api.citegeist_bridge import extract_draft_citation, render_single_bibtex
+
+
+class CitationEnrichmentTests(unittest.TestCase):
+    def test_render_normalized_text_includes_volume_number_and_pages(self) -> None:
+        rendered = _render_normalized_text(
+            "article",
+            {
+                "author": "Daniell, W.C.",
+                "year": "1872",
+                "title": "Letters referring to experiments of W.C. Daniell",
+                "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                "volume": "2",
+                "number": "4",
+                "pages": "387-390",
+                "doi": "10.1000/example",
+            },
+        )
+
+        self.assertEqual(
+            rendered,
+            "Daniell, W.C. (1872). Letters referring to experiments of W.C. Daniell. Comm. Rept. U.S. Comm. Fish & Fish., 2(4): 387-390. DOI:10.1000/example",
+        )
+
+    def test_crossref_mapping_keeps_volume_issue_and_pages(self) -> None:
+        entry = _crossref_message_to_entry(
+            {
+                "type": "journal-article",
+                "title": ["Example Work"],
+                "issued": {"date-parts": [[1872]]},
+                "author": [{"family": "Daniell", "given": "W.C."}],
+                "container-title": ["Comm. Rept. U.S. Comm. Fish & Fish."],
+                "DOI": "10.1000/example",
+                "URL": "https://doi.org/10.1000/example",
+                "volume": "2",
+                "issue": "4",
+                "page": "387-390",
+            }
+        )
+
+        self.assertEqual(entry.fields["volume"], "2")
+        self.assertEqual(entry.fields["number"], "4")
+        self.assertEqual(entry.fields["pages"], "387-390")
+
+    def test_openalex_mapping_keeps_biblio_fields(self) -> None:
+        entry = _openalex_work_to_entry(
+            {
+                "id": "https://openalex.org/W12345",
+                "display_name": "OpenAlex Discovered Work",
+                "publication_year": 2022,
+                "type": "article",
+                "doi": "https://doi.org/10.1000/example-openalex",
+                "authorships": [{"author": {"display_name": "J S, Smith"}}],
+                "primary_location": {"source": {"display_name": "Journal of Graph Discovery"}},
+                "biblio": {"volume": "12", "issue": "3", "first_page": "101", "last_page": "118"},
+                "abstract_inverted_index": {"Graphs": [0], "support": [1], "learning": [2]},
+            }
+        )
+
+        self.assertEqual(entry.fields["author"], "Smith, J. S.")
+        self.assertEqual(entry.fields["volume"], "12")
+        self.assertEqual(entry.fields["number"], "3")
+        self.assertEqual(entry.fields["pages"], "101-118")
+        self.assertEqual(entry.fields["abstract"], "Graphs support learning")
+
+    def test_openalex_mapping_handles_null_source(self) -> None:
+        entry = _openalex_work_to_entry(
+            {
+                "id": "https://openalex.org/W54321",
+                "display_name": "OpenAlex Work Without Source",
+                "publication_year": 2021,
+                "type": "article",
+                "doi": "https://doi.org/10.1000/example-null-source",
+                "authorships": [{"author": {"display_name": "Jane Smith"}}],
+                "primary_location": {"source": None},
+                "biblio": {"volume": "5", "issue": "1", "first_page": "10", "last_page": "20"},
+            }
+        )
+
+        self.assertEqual(entry.fields["title"], "OpenAlex Work Without Source")
+        self.assertNotIn("journal", entry.fields)
+        self.assertEqual(entry.fields["volume"], "5")
+        self.assertEqual(entry.fields["number"], "1")
+        self.assertEqual(entry.fields["pages"], "10-20")
+
+    def test_datacite_mapping_keeps_container_and_pages(self) -> None:
+        entry = _datacite_item_to_entry(
+            {
+                "attributes": {
+                    "titles": [{"title": "DataCite Work"}],
+                    "creators": [{"name": "J R, Rivera"}],
+                    "publicationYear": "2021",
+                    "doi": "10.1000/datacite-work",
+                    "url": "https://doi.org/10.1000/datacite-work",
+                    "container": "Journal of Metadata",
+                    "volume": "7",
+                    "issue": "2",
+                    "firstPage": "44",
+                    "lastPage": "59",
+                    "descriptions": [
+                        {"descriptionType": "Abstract", "description": "Abstract: Metadata makes reuse easier."}
+                    ],
+                }
+            }
+        )
+
+        self.assertEqual(entry.fields["author"], "Rivera, J. R.")
+        self.assertEqual(entry.fields["journal"], "Journal of Metadata")
+        self.assertEqual(entry.fields["volume"], "7")
+        self.assertEqual(entry.fields["number"], "2")
+        self.assertEqual(entry.fields["pages"], "44-59")
+        self.assertEqual(entry.fields["abstract"], "Metadata makes reuse easier.")
+
+    def test_render_single_bibtex_preserves_balanced_braces_and_repairs_unmatched_ones(self) -> None:
+        rendered = render_single_bibtex(
+            "misc",
+            "example",
+            {
+                "title": "Alpha_beta {Gamma}",
+                "note": "raw_reference = {Alpha } beta}",
+            },
+        )
+
+        self.assertIn("title = {Alpha_beta {Gamma}}", rendered)
+        self.assertIn("note = {raw_reference = {Alpha } beta)}", rendered)
+
+    def test_extract_draft_citation_repairs_report_style_reference_fields(self) -> None:
+        class MockEntry:
+            entry_type = "misc"
+            citation_key = "badkey"
+            fields = {
+                "title": "Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.",
+                "year": "1872",
+                "note": "extracted_reference = {true}",
+            }
+
+        with patch(
+            "ecospecies_api.citegeist_bridge._load_citegeist_extract",
+            return_value=lambda text: [MockEntry()],
+        ):
+            draft = extract_draft_citation(
+                "Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.",
+                legacy_reference_number="160",
+            )
+
+        self.assertIsNotNone(draft)
+        assert draft is not None
+        self.assertEqual(draft.fields["author"], "Daniell, W.C")
+        self.assertEqual(
+            draft.fields["title"],
+            "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+        )
+        self.assertEqual(draft.fields["journal"], "Comm. Rept. U.S. Comm. Fish & Fish")
+        self.assertEqual(draft.fields["volume"], "2")
+        self.assertEqual(draft.fields["pages"], "387-390")
+        self.assertEqual(draft.citation_key, "daniell1872lettersreferringexperiments")
+
+    def test_extract_draft_citation_does_not_split_title_on_report_word(self) -> None:
+        class MockEntry:
+            entry_type = "misc"
+            citation_key = "badkey"
+            fields = {
+                "title": "Smith, J. 1999. Habitat report synthesis for Alabama shad. NOAA Tech. Memo. NMFS-SEFSC-200.",
+                "year": "1999",
+                "note": "extracted_reference = {true}",
+            }
+
+        with patch(
+            "ecospecies_api.citegeist_bridge._load_citegeist_extract",
+            return_value=lambda text: [MockEntry()],
+        ):
+            draft = extract_draft_citation(
+                "Smith, J. 1999. Habitat report synthesis for Alabama shad. NOAA Tech. Memo. NMFS-SEFSC-200.",
+                legacy_reference_number="42",
+            )
+
+        self.assertIsNotNone(draft)
+        assert draft is not None
+        self.assertEqual(draft.fields["author"], "Smith, J")
+        self.assertEqual(draft.fields["title"], "Habitat report synthesis for Alabama shad")
+        self.assertEqual(draft.fields["howpublished"], "NOAA Tech. Memo. NMFS-SEFSC-200")
+        self.assertNotIn("journal", draft.fields)
+
+    def test_extract_draft_citation_repairs_proc_abbreviation_venue(self) -> None:
+        class MockEntry:
+            entry_type = "misc"
+            citation_key = "badkey"
+            fields = {
+                "title": "Bailey, R.M., H.E. Winn and C.L. Smith. 1954. Fishes from the Escambia River, Alabama and Florida, with ecologic and taxonomic notes. Proc. Acad. Sci. Philad. 106: 109-134.",
+                "year": "1954",
+                "note": "extracted_reference = {true}",
+            }
+
+        with patch(
+            "ecospecies_api.citegeist_bridge._load_citegeist_extract",
+            return_value=lambda text: [MockEntry()],
+        ):
+            draft = extract_draft_citation(
+                "Bailey, R.M., H.E. Winn and C.L. Smith. 1954. Fishes from the Escambia River, Alabama and Florida, with ecologic and taxonomic notes. Proc. Acad. Sci. Philad. 106: 109-134.",
+                legacy_reference_number="26",
+            )
+
+        self.assertIsNotNone(draft)
+        assert draft is not None
+        self.assertEqual(
+            draft.fields["title"],
+            "Fishes from the Escambia River, Alabama and Florida, with ecologic and taxonomic notes",
+        )
+        self.assertEqual(draft.fields["journal"], "Proc. Acad. Sci. Philad")
+        self.assertEqual(draft.fields["volume"], "106")
+        self.assertEqual(draft.fields["pages"], "109-134")
+
+    def test_extract_draft_citation_repairs_occasional_paper_venue(self) -> None:
+        class MockEntry:
+            entry_type = "misc"
+            citation_key = "badkey"
+            fields = {
+                "title": "Behre, E.H. 1950. Annotated list of the fauna of the Grand Isle region, 1928-1946. Occas. Pap. Mar. Lab., LSU 6(6): 1-66.",
+                "year": "1950",
+                "note": "extracted_reference = {true}",
+            }
+
+        with patch(
+            "ecospecies_api.citegeist_bridge._load_citegeist_extract",
+            return_value=lambda text: [MockEntry()],
+        ):
+            draft = extract_draft_citation(
+                "Behre, E.H. 1950. Annotated list of the fauna of the Grand Isle region, 1928-1946. Occas. Pap. Mar. Lab., LSU 6(6): 1-66.",
+                legacy_reference_number="41",
+            )
+
+        self.assertIsNotNone(draft)
+        assert draft is not None
+        self.assertEqual(
+            draft.fields["title"],
+            "Annotated list of the fauna of the Grand Isle region, 1928-1946",
+        )
+        self.assertEqual(draft.fields["journal"], "Occas. Pap. Mar. Lab., LSU")
+        self.assertEqual(draft.fields["volume"], "6")
+        self.assertEqual(draft.fields["number"], "6")
+        self.assertEqual(draft.fields["pages"], "1-66")
+
+    def test_extract_draft_citation_repairs_partial_existing_venue_stub(self) -> None:
+        class MockEntry:
+            entry_type = "misc"
+            citation_key = "badkey"
+            fields = {
+                "title": "Annotated list of the fauna of the Grand Isle region, 1928-1946",
+                "year": "1950",
+                "howpublished": "Occas",
+                "note": "extracted_reference = {true}",
+            }
+
+        with patch(
+            "ecospecies_api.citegeist_bridge._load_citegeist_extract",
+            return_value=lambda text: [MockEntry()],
+        ):
+            draft = extract_draft_citation(
+                "Behre, E.H. 1950. Annotated list of the fauna of the Grand Isle region, 1928-1946. Occas. Pap. Mar. Lab., LSU 6(6): 1-66.",
+                legacy_reference_number="41",
+            )
+
+        self.assertIsNotNone(draft)
+        assert draft is not None
+        self.assertEqual(draft.fields["journal"], "Occas. Pap. Mar. Lab., LSU")
+        self.assertEqual(draft.fields["volume"], "6")
+        self.assertEqual(draft.fields["number"], "6")
+        self.assertEqual(draft.fields["pages"], "1-66")
+
+    def test_falls_back_to_internal_resolver_when_citegeist_repo_is_unavailable(self) -> None:
+        class MockResolver:
+            def resolve_entry(self, entry):
+                class Resolution:
+                    source_label = "crossref:doi:10.1000/example"
+
+                    class Entry:
+                        entry_type = "article"
+                        citation_key = "doi101000example"
+                        fields = {
+                            "author": "Smith, Jane",
+                            "year": "2024",
+                            "title": "Example Work",
+                            "journal": "Journal of Examples",
+                            "doi": "10.1000/example",
+                            "url": "https://doi.org/10.1000/example",
+                        }
+
+                    entry = Entry()
+
+                return Resolution()
+
+        with patch(
+            "ecospecies_api.citation_enrichment._load_citegeist_resolution_components",
+            return_value=(None, None, None, None),
+        ):
+            result = enrich_citation_payload(
+                {
+                    "raw_text": "Smith, Jane. 2024. Example Work.",
+                    "legacy_reference_number": "7",
+                },
+                resolver=MockResolver(),
+            )
+
+        self.assertEqual(result["enrichment_status"], "resolved")
+        self.assertEqual(result["doi"], "10.1000/example")
+        self.assertEqual(result["source_url"], "https://doi.org/10.1000/example")
+        self.assertEqual(result["resolver_source_label"], "crossref:doi:10.1000/example")
+        self.assertIn("ecospecies_reference_number = {7}", result["draft_bibtex"])
+
+    def test_enrichment_replaces_raw_reference_title_and_dedupes_legacy_note(self) -> None:
+        class MockResolver:
+            def resolve_entry(self, entry):
+                class Resolution:
+                    source_label = "crossref:search:Letters referring to experiments"
+
+                    class Entry:
+                        entry_type = "article"
+                        citation_key = "daniell1872lettersshadalabama"
+                        fields = {
+                            "author": "Daniell, W.C.",
+                            "year": "1872",
+                            "title": "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+                            "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                            "url": "",
+                        }
+
+                    entry = Entry()
+
+                return Resolution()
+
+        result = enrich_citation_payload(
+            {
+                "raw_text": "Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.",
+                "legacy_reference_number": "160",
+                "citation_key": "daniell1948daniellwc",
+            },
+            resolver=MockResolver(),
+        )
+
+        self.assertEqual(result["enrichment_status"], "resolved")
+        self.assertEqual(result["citation_key"], "daniell1872lettersreferringexperiments")
+        self.assertIn(
+            "title = {Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River}",
+            result["draft_bibtex"],
+        )
+        self.assertIn("year = {1872}", result["draft_bibtex"])
+        self.assertEqual(result["draft_bibtex"].count("ecospecies_reference_number = {160}"), 1)
+
+    def test_enrichment_rejects_conflicting_resolved_metadata(self) -> None:
+        class MockResolver:
+            def resolve_entry(self, entry):
+                class Resolution:
+                    source_label = "crossref:search:alabama-shad-false-positive"
+
+                    class Entry:
+                        entry_type = "article"
+                        citation_key = "daniell2009habitatuseage"
+                        fields = {
+                            "author": "Daniell, W.C.",
+                            "year": "2009",
+                            "title": "Habitat use of age 0 Alabama shad in the Pascagoula River drainage, USA",
+                            "journal": "Transactions of the American Fisheries Society",
+                            "doi": "10.1111/j.1600-0633.2009.00395.x",
+                            "url": "https://doi.org/10.1111/j.1600-0633.2009.00395.x",
+                            "volume": "19",
+                            "number": "1",
+                            "pages": "107-115",
+                        }
+
+                    entry = Entry()
+
+                return Resolution()
+
+        result = enrich_citation_payload(
+            {
+                "raw_text": "Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.",
+                "legacy_reference_number": "160",
+            },
+            resolver=MockResolver(),
+        )
+
+        self.assertEqual(result["enrichment_status"], "unresolved")
+        self.assertIn("conflicts with citation seed fields", result["enrichment_error"])
+
+    def test_unresolved_enrichment_still_returns_refreshed_seed_fields(self) -> None:
+        class MockResolver:
+            def resolve_entry(self, entry):
+                return None
+
+        result = enrich_citation_payload(
+            {
+                "raw_text": "Behre, E.H. 1950. Annotated list of the fauna of the Grand Isle region, 1928-1946. Occas. Pap. Mar. Lab., LSU 6(6): 1-66.",
+                "legacy_reference_number": "41",
+                "citation_key": "oldbadkey",
+                "entry_type": "misc",
+            },
+            resolver=MockResolver(),
+        )
+
+        self.assertEqual(result["enrichment_status"], "unresolved")
+        self.assertEqual(result["citation_key"], "behre1950annotatedlistfauna")
+        self.assertIn("title = {Annotated list of the fauna of the Grand Isle region, 1928-1946}", result["draft_bibtex"])
+        self.assertIn("Occas. Pap. Mar. Lab., LSU", result["draft_bibtex"])
+
+    def test_discover_citation_candidates_returns_scored_candidates(self) -> None:
+        class MockResolver:
+            def search_crossref_candidates(self, title):
+                return [
+                    LocalResolution(
+                        LocalBibEntry(
+                            "article",
+                            "daniell1872lettersreferringexperiments",
+                            {
+                                "author": "Daniell, W.C.",
+                                "year": "1872",
+                                "title": "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+                                "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                                "volume": "2",
+                                "pages": "387-390",
+                            },
+                        ),
+                        "crossref:search:1:daniell-good",
+                    ),
+                    LocalResolution(
+                        LocalBibEntry(
+                            "article",
+                            "daniell2009habitatuseage",
+                            {
+                                "author": "Daniell, W.C.",
+                                "year": "2009",
+                                "title": "Habitat use of age 0 Alabama shad in the Pascagoula River drainage, USA",
+                                "journal": "Transactions of the American Fisheries Society",
+                                "volume": "19",
+                                "number": "1",
+                                "pages": "107-115",
+                            },
+                        ),
+                        "crossref:search:2:daniell-bad",
+                    ),
+                ]
+
+            def search_datacite_candidates(self, title):
+                return []
+
+            def search_openalex_candidates(self, title):
+                return []
+
+        result = discover_citation_candidates(
+            {
+                "raw_text": "Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.",
+                "legacy_reference_number": "160",
+            },
+            resolver=MockResolver(),
+        )
+
+        self.assertEqual(result["candidate_count"], 2)
+        self.assertGreater(result["candidates"][0]["score"], result["candidates"][1]["score"])
+        self.assertEqual(result["candidates"][0]["field_matches"]["year"]["status"], "exact")
+        self.assertEqual(result["candidates"][1]["field_matches"]["year"]["status"], "conflict")
+
+    def test_local_crossref_candidate_search_filters_placeholder_stub_entries(self) -> None:
+        resolver = LocalMetadataResolver()
+        resolver._safe_get_json = lambda url: {
+            "message": {
+                "items": [
+                    {
+                        "type": "journal-article",
+                        "title": ["Referenced work 1"],
+                        "issued": {"date-parts": [[2020]]},
+                    },
+                    {
+                        "type": "journal-article",
+                        "title": ["Useful Paper"],
+                        "issued": {"date-parts": [[2020]]},
+                        "author": [{"family": "Smith", "given": "J S"}],
+                        "container-title": ["Journal of Examples"],
+                        "DOI": "10.1000/useful",
+                    },
+                ]
+            }
+        }
+
+        results = resolver.search_crossref_candidates("Useful Paper")
+
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0].entry.fields["title"], "Useful Paper")
+
+    def test_apply_citation_candidate_selection_uses_selected_candidate_fields(self) -> None:
+        result = apply_citation_candidate_selection(
+            {
+                "raw_text": "Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.",
+                "legacy_reference_number": "160",
+            },
+            {
+                "source_label": "crossref:search:1:daniell-good",
+                "entry_type": "article",
+                "fields": {
+                    "author": "Daniell, W.C.",
+                    "year": "1872",
+                    "title": "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+                    "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                    "volume": "2",
+                    "pages": "387-390",
+                },
+            },
+        )
+
+        self.assertEqual(result["enrichment_status"], "resolved")
+        self.assertEqual(result["resolver_source_label"], "editor:selected:crossref:search:1:daniell-good")
+        self.assertIn("Comm. Rept. U.S. Comm. Fish & Fish., 2: 387-390", result["normalized_text"])
diff --git a/apps/api/tests/test_document_format.py b/apps/api/tests/test_document_format.py
new file mode 100644
index 0000000..b657580
--- /dev/null
+++ b/apps/api/tests/test_document_format.py
@@ -0,0 +1,195 @@
+from __future__ import annotations
+
+import json
+import unittest
+
+from ecospecies_api.document_format import (
+    DocumentNode,
+    StructuredDocument,
+    build_document_from_species_payload,
+    extract_citation_entries,
+    extract_species_projection,
+    export_markdown_document,
+    parse_markdown_document,
+    validate_markdown_document,
+)
+
+
+class StructuredMarkdownTests(unittest.TestCase):
+    def test_round_trip_markdown_preserves_metadata_and_hierarchy(self) -> None:
+        source = """---
+title: American Oyster
+common_name: American Oyster
+scientific_name: Crassostrea virginica
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 5192
+    label: FLELMR
+taxon_identifiers:
+  - authority: worms
+    identifier: 159059
+    label: AphiaID
+    primary: true
+primary_taxon_authority: worms
+---
+
+## Summary
+Short abstract.
+
+## Habitat
+
+### Type
+Estuarine.
+"""
+
+        document = parse_markdown_document(source)
+
+        self.assertEqual(document.metadata["title"], "American Oyster")
+        self.assertEqual(document.metadata["primary_taxon_authority"], "worms")
+        self.assertEqual(document.metadata["legacy_identifiers"][0]["identifier"], "5192")
+        self.assertEqual(document.metadata["taxon_identifiers"][0]["authority"], "worms")
+        self.assertEqual(document.nodes[0].title, "Summary")
+        self.assertEqual(document.nodes[1].children[0].title, "Type")
+        self.assertIn("## Habitat", export_markdown_document(document))
+
+    def test_build_document_from_species_payload_creates_markdown_sections(self) -> None:
+        document = build_document_from_species_payload(
+            {
+                "title": "American Oyster",
+                "common_name": "American Oyster",
+                "scientific_name": "Crassostrea virginica",
+                "flelmr_code": "5192",
+                "source_file": "American Oyster.txt",
+                "summary": "Short abstract.",
+                "sections": [
+                    {"heading": "HEADER", "content": "Ignored header"},
+                    {"heading": "Habitat", "content": "Estuarine."},
+                    {"heading": "Reproduction", "content": "Broadcast spawner."},
+                ],
+            }
+        )
+
+        self.assertEqual(document.metadata["legacy_identifiers"][0]["identifier"], "5192")
+        self.assertEqual(document.metadata["legacy_identifiers"][0]["authority"], "legacy-ecospecies")
+        self.assertEqual([node.title for node in document.nodes], ["Summary", "Habitat", "Reproduction"])
+        self.assertEqual(document.nodes[1].body, "Estuarine.")
+
+    def test_extract_species_projection_flattens_nested_headings(self) -> None:
+        document = parse_markdown_document(
+            """---
+title: American Oyster
+common_name: American Oyster
+scientific_name: Crassostrea virginica
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 5192
+    label: FLELMR
+---
+
+## Summary
+Short abstract.
+
+## Habitat
+General habitat.
+
+### Type
+Estuarine.
+"""
+        )
+
+        projection = extract_species_projection(document)
+
+        self.assertEqual(projection["summary"], "Short abstract.")
+        self.assertEqual(projection["flelmr_code"], "5192")
+        self.assertEqual(
+            [section["heading"] for section in projection["sections"]],
+            ["Habitat", "Habitat / Type"],
+        )
+
+    def test_extract_species_projection_accepts_legacy_species_code_front_matter(self) -> None:
+        document = parse_markdown_document(
+            """---
+title: Legacy Fish
+common_name: Legacy Fish
+scientific_name: Pisces historicus
+species_code: 4242
+---
+
+## Habitat
+Estuarine.
+"""
+        )
+
+        projection = extract_species_projection(document)
+
+        self.assertEqual(projection["flelmr_code"], "4242")
+
+    def test_validate_markdown_document_rejects_missing_front_matter_and_depth_jump(self) -> None:
+        errors = validate_markdown_document(
+            """## Habitat
+Text
+
+#### Type
+Nested too deeply.
+"""
+        )
+
+        self.assertTrue(any("front matter" in error for error in errors))
+        self.assertTrue(any("Heading depth jumps" in error for error in errors))
+
+    def test_extract_citation_entries_strips_legacy_comma_number_prefix(self) -> None:
+        document = parse_markdown_document(
+            """---
+title: Alabama Shad
+common_name: Alabama Shad
+scientific_name: Alosa alabamae
+---
+
+## References
+160, Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.
+"""
+        )
+
+        citations = extract_citation_entries(document)
+
+        self.assertEqual(len(citations), 1)
+        self.assertEqual(citations[0]["legacy_reference_number"], "160")
+        self.assertTrue(citations[0]["raw_text"].startswith("Daniell, W.C. 1872."))
+        self.assertFalse(citations[0]["raw_text"].startswith("160,"))
+
+    def test_extract_citation_entries_accepts_colon_terminated_citation_heading(self) -> None:
+        citations = extract_citation_entries(
+            StructuredDocument(
+                metadata={},
+                nodes=[
+                    DocumentNode(
+                        node_type="section",
+                        title="Citations:",
+                        body="7, Ahmed, M. 1975. Speciation in living oysters.",
+                        depth=2,
+                    )
+                ],
+            )
+        )
+
+        self.assertEqual(len(citations), 1)
+        self.assertEqual(citations[0]["legacy_reference_number"], "7")
+
+    def test_extract_citation_entries_accepts_bare_number_prefix(self) -> None:
+        document = parse_markdown_document(
+            """---
+title: Eastern Mosquitofish
+common_name: Eastern Mosquitofish
+scientific_name: Gambusia holbrooki
+---
+
+## Citations
+848 Gilmore, R.G. 1977. Fishes of the Indian River Lagoon and adjacent waters, Florida.
+"""
+        )
+
+        citations = extract_citation_entries(document)
+
+        self.assertEqual(len(citations), 1)
+        self.assertEqual(citations[0]["legacy_reference_number"], "848")
+        self.assertTrue(citations[0]["raw_text"].startswith("Gilmore, R.G. 1977."))
diff --git a/apps/api/tests/test_parser.py b/apps/api/tests/test_parser.py
new file mode 100644
index 0000000..a53eedd
--- /dev/null
+++ b/apps/api/tests/test_parser.py
@@ -0,0 +1,109 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+from ecospecies_api import parser
+
+
+class ParserPathResolutionTests(unittest.TestCase):
+    def test_ecospecies_code_is_treated_as_flelmr_code(self) -> None:
+        metadata = parser.extract_metadata(
+            [
+                "Title: Test Fish",
+                "EcoSpecies Code: 4242",
+            ]
+        )
+
+        self.assertEqual(metadata["ecospecies code"], "4242")
+        self.assertEqual(metadata["flelmr"], "4242")
+
+    def test_title_case_headings_are_split_into_sections(self) -> None:
+        sections = parser.split_sections(
+            [
+                "Species profile: American oyster (Crassostrea virginica)",
+                "",
+                "Classification",
+                "      Phylum: Mollusca",
+                "Value",
+                "Commercial: Important fishery.",
+                "Habitat",
+                "Type: Estuarine.",
+            ]
+        )
+
+        self.assertEqual(
+            [section.heading for section in sections],
+            ["HEADER", "Classification", "Value", "Habitat"],
+        )
+
+    def test_colon_terminated_title_case_headings_are_split_into_sections(self) -> None:
+        sections = parser.split_sections(
+            [
+                "Ecological Interactions and Notes",
+                "Predator text.",
+                "",
+                "Reference Numbers:",
+                "",
+                "Citations:",
+                "7, Ahmed, M. 1975. Speciation in living oysters.",
+            ]
+        )
+
+        self.assertEqual(
+            [section.heading for section in sections],
+            ["HEADER", "Citations"],
+        )
+
+    def test_default_data_dir_uses_in_repo_path_without_spaces(self) -> None:
+        with patch.dict("os.environ", {}, clear=True):
+            resolved = Path(parser.get_default_data_dir())
+
+        self.assertEqual(resolved, parser.get_repo_root() / "input-data" / "InputFiles")
+
+    def test_relative_override_must_stay_within_repo(self) -> None:
+        with self.assertRaisesRegex(ValueError, "within the codebase directory"):
+            parser.resolve_data_dir("../input-data/InputFiles")
+
+    def test_absolute_override_outside_repo_is_rejected(self) -> None:
+        with tempfile.TemporaryDirectory() as tempdir:
+            with self.assertRaisesRegex(ValueError, "within the codebase directory"):
+                parser.resolve_data_dir(tempdir)
+
+    def test_directory_names_with_spaces_are_rejected(self) -> None:
+        with self.assertRaisesRegex(ValueError, "unsafe directory name"):
+            parser.resolve_data_dir("input-data/Bad Name")
+
+    def test_directory_names_with_special_characters_are_rejected(self) -> None:
+        with self.assertRaisesRegex(ValueError, "unsafe directory name"):
+            parser.resolve_data_dir("input-data/bad@name")
+
+    def test_load_species_records_resolves_repo_relative_paths(self) -> None:
+        records = parser.load_species_records("input-data/InputFiles")
+
+        self.assertGreater(len(records), 0)
+
+    def test_duplicate_source_records_receive_unique_stable_slugs(self) -> None:
+        records = parser.load_species_records("input-data/InputFiles")
+        slug_by_source = {record.source_file: record.slug for record in records}
+
+        self.assertEqual(len(records), len(set(record.slug for record in records)))
+        self.assertEqual(
+            slug_by_source["Red Snapper_SLH_Outline2012_0722.txt"],
+            "red-snapper-red-snapper-slh-outline2012-0722",
+        )
+        self.assertEqual(
+            slug_by_source["RedSnapper_SLH_2012_0830_combined.txt"],
+            "red-snapper-redsnapper-slh-2012-0830-combined",
+        )
+        self.assertEqual(
+            slug_by_source["Sailfin Molly SLH RGG.txt"],
+            "sailfin-molly-sailfin-molly-slh-rgg",
+        )
+        self.assertTrue(
+            slug_by_source["Sailfin_Molly SLH RGG.txt"].startswith(
+                "sailfin-molly-sailfin-molly-slh-rgg-"
+            )
+        )
diff --git a/apps/api/tests/test_repository.py b/apps/api/tests/test_repository.py
index 1219234..bab05a6 100644
--- a/apps/api/tests/test_repository.py
+++ b/apps/api/tests/test_repository.py
@@ -112,6 +112,35 @@ class RepositoryWorkflowTests(unittest.TestCase):
         self.assertEqual(detail["section_count"], 2)
         self.assertEqual([section["position"] for section in detail["sections"]], [1, 2])
         self.assertEqual([item["code"] for item in detail["diagnostics"]], ["missing_citations"])
+        self.assertEqual(
+            detail["legacy_identifiers"],
+            [
+                {
+                    "authority": "legacy-ecospecies",
+                    "identifier": "9999",
+                    "label": "FLELMR",
+                }
+            ],
+        )
+
+    def test_species_detail_includes_structured_document_and_legacy_source(self) -> None:
+        input_dir = Path(self.tempdir.name) / "input-data" / "InputFiles"
+        input_dir.mkdir(parents=True, exist_ok=True)
+        (input_dir / "Test Shad.txt").write_text("HEADER\nLegacy header content\n", encoding="utf-8")
+
+        with patch.object(repository, "get_default_data_dir", return_value=str(input_dir)):
+            detail = repository.get_species_by_slug("test-shad")
+
+        self.assertIsNotNone(detail)
+        assert detail is not None
+        self.assertEqual(detail["structured_document"]["source_format"], "ecospecies-markdown-v1")
+        self.assertIn(
+            "HABITAT",
+            [node["title"] for node in detail["structured_document"]["ast"]["nodes"]],
+        )
+        self.assertEqual(detail["legacy_source"]["source_file"], "Test Shad.txt")
+        self.assertIn("Legacy header content", detail["legacy_source"]["text"])
+        self.assertEqual(detail["taxon_identifiers"], [])
 
     def test_editorial_update_changes_publication_visibility_and_creates_audit(self) -> None:
         result = repository.update_species_editorial(
@@ -207,6 +236,60 @@ class RepositoryWorkflowTests(unittest.TestCase):
         self.assertEqual(len(audit), 2)
         self.assertEqual([entry["action"] for entry in audit], ["section_update", "editorial_update"])
 
+    def test_reimport_preserves_persisted_taxon_identifiers(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad
+common_name: Test Shad
+scientific_name: Alosa testus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 9999
+    label: FLELMR
+taxon_identifiers:
+  - authority: gbif
+    identifier: 12345
+    label: taxonKey
+    primary: true
+primary_taxon_authority: gbif
+---
+
+## Summary
+Taxon-reviewed summary.
+""",
+            username="edith",
+        )
+
+        repository.import_species_payload(UPDATED_PAYLOAD)
+
+        detail = repository.get_editor_species_detail("test-shad")
+
+        self.assertIsNotNone(detail)
+        self.assertEqual(detail["primary_taxon_authority"], "gbif")
+        self.assertEqual(
+            detail["primary_taxon_identifier"],
+            {
+                "authority": "gbif",
+                "identifier": "12345",
+                "label": "taxonKey",
+                "primary": True,
+                "source_url": "",
+            },
+        )
+        self.assertEqual(
+            detail["taxon_identifiers"],
+            [
+                {
+                    "authority": "gbif",
+                    "identifier": "12345",
+                    "label": "taxonKey",
+                    "primary": True,
+                    "source_url": "",
+                }
+            ],
+        )
+
     def test_reimport_updates_summary_when_no_editorial_override_exists(self) -> None:
         repository.import_species_payload(UPDATED_PAYLOAD)
 
@@ -302,6 +385,583 @@ class RepositoryWorkflowTests(unittest.TestCase):
         self.assertEqual(audit[0]["action"], "import_restore")
         self.assertEqual(audit[0]["details"]["is_archived"], {"from": True, "to": False})
 
+    def test_document_markdown_update_refreshes_flat_projection(self) -> None:
+        result = repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+species_code: 4242
+---
+
+## Summary
+Markdown summary.
+
+## Habitat
+Open water.
+
+### Type
+Pelagic.
+""",
+            username="frank",
+        )
+
+        detail = repository.get_editor_species_detail("test-shad")
+        document = repository.get_species_document("test-shad")
+        audit = repository.list_species_audit("test-shad")
+
+        self.assertIsNotNone(result)
+        self.assertIsNotNone(detail)
+        self.assertIsNotNone(document)
+        self.assertEqual(detail["title"], "Test Shad Markdown")
+        self.assertEqual(detail["scientific_name"], "Alosa markdownus")
+        self.assertEqual(detail["flelmr_code"], "4242")
+        self.assertEqual(detail["summary"], "Markdown summary.")
+        self.assertEqual(
+            [section["heading"] for section in detail["sections"]],
+            ["Habitat", "Habitat / Type"],
+        )
+        self.assertEqual(document["updated_by"], "frank")
+        self.assertIsNotNone(audit)
+        self.assertEqual(audit[0]["action"], "document_update")
+
+    def test_document_markdown_update_extracts_citations(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## Summary
+Markdown summary.
+
+## References
+
+- Smith, J. 2024. Example paper. doi:10.1000/example-doi
+- [7] Jones, A. 2022. Fisheries review.
+""",
+            username="frank",
+        )
+
+        detail = repository.get_editor_species_detail("test-shad")
+
+        self.assertIsNotNone(detail)
+        self.assertEqual(detail["citation_count"], 2)
+        self.assertEqual(detail["citations"][0]["section_heading"], "References")
+        self.assertEqual(detail["citations"][0]["legacy_reference_number"], "")
+        self.assertEqual(detail["citations"][0]["doi"], "10.1000/example-doi")
+        self.assertTrue(detail["citations"][0]["citation_key"])
+        self.assertIn("@", detail["citations"][0]["draft_bibtex"])
+        self.assertEqual(detail["citations"][0]["review_status"], "draft")
+        self.assertEqual(detail["citations"][1]["legacy_reference_number"], "7")
+        self.assertEqual(detail["citations"][1]["doi"], "")
+        self.assertIn("ecospecies_reference_number = \\{7\\}", detail["citations"][1]["draft_bibtex"])
+
+    def test_editor_can_review_citations_and_reviews_survive_document_save(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Jones, A. 2022. Fisheries review.
+""",
+            username="frank",
+        )
+
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        citation = citations["citations"][0]
+
+        result = repository.update_species_citation_review(
+            slug="test-shad",
+            citation_id=citation["id"],
+            review_status="accepted",
+            normalized_text="Jones, A. (2022). Fisheries review.",
+            doi="10.1000/review-doi",
+            citation_key="jones2022review",
+            entry_type="article",
+            draft_bibtex="@article{jones2022review,\n  doi = {10.1000/review-doi}\n}",
+            username="edith",
+        )
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result["citation"]["review_status"], "accepted")
+        self.assertEqual(result["citation"]["source_type"], "editor_review")
+
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Jones, A. 2022. Fisheries review.
+""",
+            username="frank",
+        )
+
+        citations = repository.get_editor_species_citations("test-shad")
+        audit = repository.list_species_audit("test-shad")
+
+        self.assertIsNotNone(citations)
+        self.assertEqual(citations["citation_count"], 1)
+        self.assertEqual(citations["citations"][0]["review_status"], "accepted")
+        self.assertEqual(citations["citations"][0]["doi"], "10.1000/review-doi")
+        self.assertEqual(citations["citations"][0]["citation_key"], "jones2022review")
+        self.assertEqual(citations["citations"][0]["entry_type"], "article")
+        self.assertIn("10.1000/review-doi", citations["citations"][0]["draft_bibtex"])
+        self.assertIsNotNone(audit)
+        self.assertEqual(audit[1]["action"], "citation_review_update")
+
+    def test_editor_can_run_citation_enrichment(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Jones, A. 2022. Fisheries review.
+""",
+            username="frank",
+        )
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        citation = citations["citations"][0]
+
+        with patch.object(
+            repository,
+            "enrich_citation_payload",
+            return_value={
+                "citation_key": "jones2022review",
+                "entry_type": "article",
+                "normalized_text": "Jones, A. (2022). Fisheries review. Journal of Tests. DOI:10.1000/review-doi",
+                "draft_bibtex": "@article{jones2022review,\n  doi = {10.1000/review-doi},\n}",
+                "doi": "10.1000/review-doi",
+                "source_url": "https://doi.org/10.1000/review-doi",
+                "openalex_id": "W12345",
+                "resolver_source_label": "crossref:doi:10.1000/review-doi",
+                "enrichment_status": "resolved",
+                "enrichment_error": "",
+                "conflicts": [],
+            },
+        ):
+            result = repository.update_species_citation_enrichment(
+                slug="test-shad",
+                citation_id=citation["id"],
+                username="edith",
+            )
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result["citation"]["enrichment_status"], "resolved")
+        self.assertEqual(result["citation"]["doi"], "10.1000/review-doi")
+        self.assertEqual(result["citation"]["openalex_id"], "W12345")
+        self.assertEqual(result["citation"]["resolver_source_label"], "crossref:doi:10.1000/review-doi")
+        self.assertEqual(result["citation"]["source_url"], "https://doi.org/10.1000/review-doi")
+
+        citations = repository.get_editor_species_citations("test-shad")
+        audit = repository.list_species_audit("test-shad")
+
+        self.assertIsNotNone(citations)
+        self.assertEqual(citations["citations"][0]["citation_key"], "jones2022review")
+        self.assertEqual(citations["citations"][0]["entry_type"], "article")
+        self.assertEqual(citations["citations"][0]["enrichment_status"], "resolved")
+        self.assertIsNotNone(audit)
+        self.assertEqual(audit[0]["action"], "citation_enrichment")
+
+    def test_editor_can_run_batch_citation_enrichment(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Jones, A. 2022. Fisheries review.
+- [8] Smith, B. 2021. Estuarine habitat paper.
+""",
+            username="frank",
+        )
+
+        payloads = [
+            {
+                "citation_key": "jones2022review",
+                "entry_type": "article",
+                "normalized_text": "Jones, A. (2022). Fisheries review.",
+                "draft_bibtex": "@article{jones2022review,\n}",
+                "doi": "10.1000/review-doi",
+                "source_url": "https://doi.org/10.1000/review-doi",
+                "openalex_id": "W12345",
+                "resolver_source_label": "crossref:doi:10.1000/review-doi",
+                "enrichment_status": "resolved",
+                "enrichment_error": "",
+                "conflicts": [],
+            },
+            {
+                "citation_key": "smith2021estuarine",
+                "entry_type": "misc",
+                "normalized_text": "",
+                "draft_bibtex": "",
+                "doi": "",
+                "source_url": "",
+                "openalex_id": "",
+                "resolver_source_label": "",
+                "enrichment_status": "unresolved",
+                "enrichment_error": "No metadata match found from DOI, title, or authority identifiers.",
+                "conflicts": [],
+            },
+        ]
+
+        with patch.object(repository, "enrich_citation_payload", side_effect=payloads):
+            result = repository.update_species_citations_enrichment_batch(
+                slug="test-shad",
+                username="edith",
+            )
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result["citation_count"], 2)
+        self.assertEqual(result["changed_count"], 2)
+        self.assertEqual(result["resolved_count"], 1)
+        self.assertEqual(result["unresolved_count"], 1)
+        self.assertEqual(result["error_count"], 0)
+
+    def test_editor_can_review_and_apply_citation_candidates(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.
+""",
+            username="frank",
+        )
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        citation = citations["citations"][0]
+
+        with patch.object(
+            repository,
+            "discover_citation_candidates",
+            return_value={
+                "seed": {
+                    "fields": {
+                        "author": "Daniell, W.C.",
+                        "year": "1872",
+                        "title": "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+                        "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                        "volume": "2",
+                        "pages": "387-390",
+                    }
+                },
+                "candidate_count": 1,
+                "candidates": [
+                    {
+                        "candidate_id": "crossref-search-1-daniell-good",
+                        "source_label": "crossref:search:1:daniell-good",
+                        "entry_type": "article",
+                        "citation_key": "daniell1872lettersreferringexperiments",
+                        "fields": {
+                            "author": "Daniell, W.C.",
+                            "year": "1872",
+                            "title": "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+                            "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                            "volume": "2",
+                            "pages": "387-390",
+                        },
+                    }
+                ],
+            },
+        ):
+            candidates = repository.get_species_citation_candidates("test-shad", citation["id"])
+
+        self.assertIsNotNone(candidates)
+        self.assertEqual(candidates["candidate_count"], 1)
+
+        result = repository.apply_species_citation_candidate_selection(
+            slug="test-shad",
+            citation_id=citation["id"],
+            candidate={
+                "source_label": "crossref:search:1:daniell-good",
+                "entry_type": "article",
+                "fields": {
+                    "author": "Daniell, W.C.",
+                    "year": "1872",
+                    "title": "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+                    "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                    "volume": "2",
+                    "pages": "387-390",
+                },
+            },
+            username="edith",
+        )
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result["citation"]["resolver_source_label"], "editor:selected:crossref:search:1:daniell-good")
+        self.assertEqual(result["citation"]["source_type"], "editor_selected_candidate")
+        self.assertEqual(result["citation"]["review_status"], "accepted")
+        audit = repository.list_species_audit("test-shad")
+        self.assertIsNotNone(audit)
+        self.assertEqual(audit[0]["action"], "citation_candidate_selection")
+
+    def test_editor_can_add_candidate_as_additional_citation_and_preserve_it(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.
+""",
+            username="frank",
+        )
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        source_citation = citations["citations"][0]
+
+        result = repository.add_species_citation_from_candidate(
+            slug="test-shad",
+            citation_id=source_citation["id"],
+            candidate={
+                "source_label": "crossref:search:1:daniell-related",
+                "entry_type": "article",
+                "fields": {
+                    "author": "Jordan, F.",
+                    "year": "2009",
+                    "title": "Habitat use of age 0 Alabama shad in the Pascagoula River drainage, USA",
+                    "journal": "Transactions of the American Fisheries Society",
+                    "volume": "19",
+                    "number": "1",
+                    "pages": "107-115",
+                    "doi": "10.1111/j.1600-0633.2009.00395.x",
+                    "url": "https://doi.org/10.1111/j.1600-0633.2009.00395.x",
+                },
+            },
+            username="edith",
+        )
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result["citation"]["source_type"], "editor_added_candidate")
+        self.assertEqual(result["citation"]["review_status"], "accepted")
+
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        self.assertEqual(citations["citation_count"], 2)
+        self.assertEqual(citations["citations"][1]["section_heading"], "References")
+        document = repository.get_species_document("test-shad")
+        self.assertIsNotNone(document)
+        self.assertIn("Habitat use of age 0 Alabama shad in the Pascagoula River drainage, USA", document["markdown"])
+
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown=document["markdown"],
+            username="frank",
+        )
+
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        self.assertEqual(citations["citation_count"], 2)
+        self.assertEqual(citations["citations"][1]["source_type"], "editor_added_candidate")
+        audit = repository.list_species_audit("test-shad")
+        self.assertIsNotNone(audit)
+        self.assertEqual(audit[0]["action"], "document_update")
+        self.assertEqual(audit[1]["action"], "citation_candidate_addition")
+
+    def test_contributor_can_view_only_owned_citations(self) -> None:
+        created = repository.create_contributor_species(
+            "writer@example.org",
+            """---
+title: Contributor Draft
+common_name: Contributor Fish
+scientific_name: Pisces contributoris
+species_code:
+---
+
+## References
+
+- [12] Example, A. 2025. Draft reference.
+""",
+        )
+
+        owned = repository.get_contributor_species_citations(created["slug"], "writer@example.org")
+        other = repository.get_contributor_species_citations(created["slug"], "other@example.org")
+
+        self.assertIsNotNone(owned)
+        self.assertEqual(owned["citation_count"], 1)
+        self.assertEqual(owned["citations"][0]["legacy_reference_number"], "12")
+        self.assertIsNone(other)
+
+    def test_public_bibliography_aggregates_species_citations(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Jones, A. 2022. Fisheries review.
+""",
+            username="frank",
+        )
+
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        citation = citations["citations"][0]
+        repository.update_species_citation_review(
+            slug="test-shad",
+            citation_id=citation["id"],
+            review_status="accepted",
+            normalized_text="Jones, A. (2022). Fisheries review.",
+            doi="10.1000/review-doi",
+            citation_key="jones2022review",
+            entry_type="article",
+            draft_bibtex="@article{jones2022review,\n  doi = {10.1000/review-doi}\n}",
+            username="edith",
+            abstract_text="A short abstract about fisheries review.",
+        )
+
+        bibliography = repository.list_public_bibliography()
+
+        self.assertEqual(len(bibliography), 1)
+        self.assertEqual(bibliography[0]["citation_key"], "jones2022review")
+        self.assertEqual(bibliography[0]["abstract_text"], "A short abstract about fisheries review.")
+        self.assertEqual(bibliography[0]["legacy_reference_numbers"], ["7"])
+        self.assertEqual(bibliography[0]["species_count"], 1)
+        self.assertEqual(bibliography[0]["species_refs"][0]["slug"], "test-shad")
+
+    def test_register_contributor_creates_token_and_enforces_age_gate(self) -> None:
+        with self.assertRaisesRegex(ValueError, "at least 13 years old"):
+            repository.register_contributor("person@example.org", False)
+
+        result = repository.register_contributor("Person@Example.org", True)
+
+        self.assertEqual(result["username"], "person@example.org")
+        self.assertEqual(result["role"], "contributor")
+        self.assertEqual(result["minimum_age"], 13)
+        self.assertTrue(result["token"])
+
+    def test_contributor_can_create_and_edit_only_owned_species(self) -> None:
+        created = repository.create_contributor_species(
+            "writer@example.org",
+            """---
+title: Contributor Draft
+common_name: Contributor Fish
+scientific_name: Pisces contributoris
+species_code: 
+---
+
+## Summary
+Draft summary.
+
+## Habitat
+Mangroves.
+""",
+        )
+
+        detail = repository.get_contributor_species_detail(created["slug"], "writer@example.org")
+        public_detail = repository.get_species_by_slug(created["slug"])
+
+        self.assertIsNotNone(detail)
+        self.assertIsNone(public_detail)
+        self.assertEqual(detail["publication_status"], "draft")
+        self.assertEqual(detail["common_name"], "Contributor Fish")
+
+        updated = repository.update_contributor_species_document_markdown(
+            created["slug"],
+            """---
+title: Contributor Draft Revised
+common_name: Contributor Fish
+scientific_name: Pisces contributoris
+species_code: 
+---
+
+## Summary
+Revised summary.
+
+## Habitat
+Seagrass.
+
+### Depth
+Shallow bays.
+""",
+            "writer@example.org",
+        )
+
+        self.assertIsNotNone(updated)
+        detail = repository.get_contributor_species_detail(created["slug"], "writer@example.org")
+        other_user_detail = repository.get_contributor_species_detail(created["slug"], "other@example.org")
+        audit = repository.list_species_audit(created["slug"])
+
+        self.assertIsNotNone(detail)
+        self.assertEqual(detail["summary"], "Revised summary.")
+        self.assertEqual(
+            [section["heading"] for section in detail["sections"]],
+            ["Habitat", "Habitat / Depth"],
+        )
+        self.assertIsNone(other_user_detail)
+        self.assertIsNotNone(audit)
+        self.assertEqual(audit[0]["action"], "contributor_document_update")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/apps/web/app.js b/apps/web/app.js
index 454eae8..d94d50b 100644
--- a/apps/web/app.js
+++ b/apps/web/app.js
@@ -1,4 +1,20 @@
-const apiBase = "";
+function getAppBase() {
+  const { pathname } = window.location;
+  if (pathname === "/" || pathname === "/index.html") {
+    return "";
+  }
+  if (pathname.endsWith("/index.html")) {
+    return pathname.slice(0, -"/index.html".length);
+  }
+  return pathname.endsWith("/") ? pathname.slice(0, -1) : pathname;
+}
+
+function getInitialSpeciesSlug() {
+  const hash = window.location.hash.replace(/^#/, "").trim();
+  return hash || "";
+}
+
+const apiBase = getAppBase();
 
 const speciesList = document.querySelector("#species-list");
 const searchInput = document.querySelector("#search");
@@ -12,26 +28,373 @@ const detailArchiveNote = document.querySelector("#detail-archive-note");
 const detailScientificName = document.querySelector("#detail-scientific-name");
 const detailSummary = document.querySelector("#detail-summary");
 const detailSections = document.querySelector("#detail-sections");
+const legacyPanel = document.querySelector("#legacy-panel");
+const legacySourceMeta = document.querySelector("#legacy-source-meta");
+const legacySourceText = document.querySelector("#legacy-source-text");
 const speciesCount = document.querySelector("#species-count");
 const sectionCount = document.querySelector("#section-count");
 const authTokenInput = document.querySelector("#auth-token");
 const authSaveButton = document.querySelector("#auth-save");
 const authClearButton = document.querySelector("#auth-clear");
 const authStatus = document.querySelector("#auth-status");
+const contributorEmailInput = document.querySelector("#contributor-email");
+const contributorAgeGate = document.querySelector("#contributor-age-gate");
+const contributorAgeLabel = document.querySelector("#contributor-age-label");
+const contributorRegisterButton = document.querySelector("#contributor-register");
+const contributorStatus = document.querySelector("#contributor-status");
+const contributorCreateButton = document.querySelector("#contributor-create");
+const accessPanel = document.querySelector("#access-panel");
 const editorPanel = document.querySelector("#editor-panel");
 const editorPublicationStatus = document.querySelector("#editor-publication-status");
-const editorSummary = document.querySelector("#editor-summary");
 const editorNotes = document.querySelector("#editor-notes");
 const editorIsArchived = document.querySelector("#editor-is-archived");
 const editorSaveButton = document.querySelector("#editor-save");
 const editorStatus = document.querySelector("#editor-status");
+const documentPanel = document.querySelector("#document-panel");
+const documentMarkdown = document.querySelector("#document-markdown");
+const documentPreview = document.querySelector("#document-preview");
+const documentSaveButton = document.querySelector("#document-save");
+const documentStatus = document.querySelector("#document-status");
+const citationPanel = document.querySelector("#citation-panel");
+const citationStatus = document.querySelector("#citation-status");
+const citationList = document.querySelector("#citation-list");
+const citationBackfillSpeciesButton = document.querySelector("#citation-backfill-species");
+const citationEnrichAllButton = document.querySelector("#citation-enrich-all");
+const citationMatchDialog = document.querySelector("#citation-match-dialog");
+const citationMatchSeed = document.querySelector("#citation-match-seed");
+const citationMatchCandidates = document.querySelector("#citation-match-candidates");
+const citationMatchStatus = document.querySelector("#citation-match-status");
+const citationMatchCloseButton = document.querySelector("#citation-match-close");
 const auditPanel = document.querySelector("#audit-panel");
 const auditList = document.querySelector("#audit-list");
+const collapsibleToggles = document.querySelectorAll(".collapsible-toggle");
 
 let currentItems = [];
 let currentSlug = null;
 let currentSession = null;
 let currentArchiveFilter = "active";
+let currentCitationMatch = null;
+let currentSpeciesCitations = [];
+let workflowPanelState = {
+  "legacy-panel": false,
+  "access-panel": false,
+  "editor-panel": false,
+  "document-panel": false,
+  "citation-panel": false,
+  "audit-panel": false,
+};
+
+function setCollapsibleState(panel, expanded) {
+  if (!panel) {
+    return;
+  }
+  panel.classList.toggle("collapsed", !expanded);
+  const toggle = panel.querySelector(".collapsible-toggle");
+  if (!toggle) {
+    return;
+  }
+  const label = toggle.dataset.label || panel.dataset.label || "Section";
+  toggle.textContent = `${expanded ? "Hide" : "Show"} ${label}`;
+  toggle.setAttribute("aria-expanded", expanded ? "true" : "false");
+  workflowPanelState[panel.id] = expanded;
+}
+
+function collapseWorkflowPanels() {
+  [legacyPanel, accessPanel, editorPanel, documentPanel, citationPanel, auditPanel].forEach((panel) => {
+    setCollapsibleState(panel, false);
+  });
+}
+
+function expandCitationPanel() {
+  setCollapsibleState(citationPanel, true);
+}
+
+function restoreWorkflowPanels() {
+  [legacyPanel, accessPanel, editorPanel, documentPanel, citationPanel, auditPanel].forEach((panel) => {
+    if (!panel || panel.classList.contains("hidden")) {
+      return;
+    }
+    setCollapsibleState(panel, Boolean(workflowPanelState[panel.id]));
+  });
+}
+
+function renderStructuredBody(body) {
+  const trimmed = String(body || "").trim();
+  if (!trimmed) {
+    return "";
+  }
+
+  const paragraphs = trimmed
+    .split(/\n\s*\n/)
+    .map((paragraph) => paragraph.trim())
+    .filter(Boolean);
+
+  return paragraphs
+    .map((paragraph) => {
+      const html = escapeHtml(paragraph).replace(/\n/g, "<br>");
+      return `<p class="structured-node-body">${html}</p>`;
+    })
+    .join("");
+}
+
+function isCitationHeading(title) {
+  const normalized = String(title || "").trim().replace(/:$/, "").toLowerCase();
+  return [
+    "reference numbers",
+    "references",
+    "reference",
+    "citations",
+    "citation",
+    "bibliography",
+    "related references",
+    "related citations",
+  ].includes(normalized);
+}
+
+function parseBibtexFields(draftBibtex) {
+  const fields = {};
+  const text = String(draftBibtex || "");
+  const pattern = /([a-zA-Z_]+)\s*=\s*\{([^}]*)\}/g;
+  let match = pattern.exec(text);
+  while (match) {
+    fields[match[1].toLowerCase()] = match[2].trim();
+    match = pattern.exec(text);
+  }
+  return fields;
+}
+
+function collectBibtexRecords(items) {
+  const seen = new Set();
+  const records = [];
+  for (const item of items || []) {
+    const draftBibtex = String(item && item.draft_bibtex ? item.draft_bibtex : "").trim();
+    if (!draftBibtex || seen.has(draftBibtex)) {
+      continue;
+    }
+    seen.add(draftBibtex);
+    records.push(draftBibtex);
+  }
+  return records;
+}
+
+function sanitizeFilenamePart(value, fallback = "records") {
+  const cleaned = String(value || "")
+    .trim()
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, "-")
+    .replace(/^-+|-+$/g, "");
+  return cleaned || fallback;
+}
+
+function downloadBibtexRecords(items, filenameStem) {
+  const records = collectBibtexRecords(items);
+  if (!records.length) {
+    return false;
+  }
+  const blob = new Blob([`${records.join("\n\n")}\n`], { type: "application/x-bibtex;charset=utf-8" });
+  const url = URL.createObjectURL(blob);
+  const link = document.createElement("a");
+  link.href = url;
+  link.download = `${sanitizeFilenamePart(filenameStem)}.bib`;
+  document.body.appendChild(link);
+  link.click();
+  document.body.removeChild(link);
+  window.setTimeout(() => URL.revokeObjectURL(url), 0);
+  return true;
+}
+
+function buildPublicCitationText(item) {
+  const fields = parseBibtexFields(item.draft_bibtex || "");
+  if (item.normalized_text) {
+    return escapeHtml(String(item.normalized_text));
+  }
+
+  const author = fields.author || "";
+  const year = fields.year || "";
+  const title = fields.title || "";
+  const venue = fields.journal || fields.booktitle || fields.publisher || "";
+  const volume = fields.volume || "";
+  const issue = fields.number || "";
+  const pages = fields.pages || "";
+
+  const parts = [];
+  const lead = [author, year ? `(${year})` : ""].filter(Boolean).join(" ");
+  if (lead) {
+    parts.push(lead);
+  }
+  if (title) {
+    parts.push(title);
+  }
+  const venueBits = [venue, volume ? `${volume}${issue ? `(${issue})` : ""}` : issue ? `(${issue})` : "", pages]
+    .filter(Boolean)
+    .join(", ");
+  if (venueBits) {
+    parts.push(venueBits);
+  }
+
+  return escapeHtml(parts.join(". ").trim() || String(item.raw_text || ""));
+}
+
+function renderPublicCitationEntry(item) {
+  const fields = parseBibtexFields(item.draft_bibtex || "");
+  const meta = [
+    item.legacy_reference_number ? `Imported reference ${escapeHtml(item.legacy_reference_number)}` : "",
+    item.source_type === "editor_added_candidate" ? "Added citation" : "",
+    item.source_type === "editor_selected_candidate" ? "Reviewed citation" : "",
+  ]
+    .filter(Boolean)
+    .join(" • ");
+
+  const links = [
+    item.doi ? `<a href="https://doi.org/${encodeURIComponent(String(item.doi).replace(/^https?:\/\/doi\.org\//, ""))}" target="_blank" rel="noopener noreferrer">DOI</a>` : "",
+    item.source_url ? `<a href="${escapeHtml(item.source_url)}" target="_blank" rel="noopener noreferrer">Source</a>` : "",
+    item.openalex_id ? `<a href="https://openalex.org/${escapeHtml(String(item.openalex_id).replace(/^https?:\/\/openalex\.org\//, ""))}" target="_blank" rel="noopener noreferrer">OpenAlex</a>` : "",
+  ]
+    .filter(Boolean)
+    .join(" · ");
+
+  return `
+    <article class="public-citation-entry">
+      <p class="public-citation-text">${buildPublicCitationText(item)}</p>
+      ${meta ? `<p class="public-citation-meta">${meta}</p>` : ""}
+      ${links ? `<p class="public-citation-links">${links}</p>` : ""}
+      ${renderCitationAbstractBlock(item.abstract_text || fields.abstract || "", false)}
+    </article>
+  `;
+}
+
+function buildPublicBibliographyMarkup(citations, filenameStem) {
+  const records = collectBibtexRecords(citations);
+  const downloadButton = `
+    <div class="public-bibliography-actions">
+      <button
+        type="button"
+        class="secondary-button bibliography-download-button"
+        data-filename-stem="${escapeHtml(filenameStem)}"
+        ${records.length ? "" : "disabled"}
+      >
+        Download BibTeX
+      </button>
+      <p class="public-bibliography-note">
+        ${records.length ? `${records.length} BibTeX record${records.length === 1 ? "" : "s"} available for download.` : "No BibTeX records are available for download yet."}
+      </p>
+    </div>
+  `;
+
+  return Array.isArray(citations) && citations.length
+    ? `${downloadButton}<div class="public-citation-list">${citations.map((item) => renderPublicCitationEntry(item)).join("")}</div>`
+    : `${downloadButton}<p class="structured-node-body">No extracted bibliography entries are available yet.</p>`;
+}
+
+function renderStructuredNodes(nodes, container, citations, renderState = { renderedBibliography: false }) {
+  for (const node of nodes || []) {
+    const rawTitle = String(node.title || "").trim() || "Untitled section";
+    const isCitationSection = isCitationHeading(rawTitle);
+    if (isCitationSection && renderState.renderedBibliography) {
+      continue;
+    }
+
+    const sectionEl = document.createElement("section");
+    sectionEl.className = "detail-section structured-node";
+
+    const depth = Number(node.depth || 2);
+    const headingLevel = Math.min(6, Math.max(3, depth + 1));
+    const title = escapeHtml(isCitationHeading(rawTitle) ? "Bibliography" : rawTitle);
+    const body = String(node.body || "").trim();
+    const children = Array.isArray(node.children) ? node.children : [];
+    const citationMarkup = isCitationSection
+      ? buildPublicBibliographyMarkup(citations, `${currentSlug || "ecospecies"}-bibliography`)
+      : "";
+
+    sectionEl.innerHTML = `
+      <h${headingLevel}>${title}</h${headingLevel}>
+      ${isCitationSection ? citationMarkup : renderStructuredBody(body)}
+      ${children.length ? '<div class="structured-node-children"></div>' : ""}
+    `;
+
+    if (isCitationSection) {
+      renderState.renderedBibliography = true;
+    }
+
+    if (children.length) {
+      renderStructuredNodes(children, sectionEl.querySelector(".structured-node-children"), citations, renderState);
+    }
+
+    container.appendChild(sectionEl);
+  }
+}
+
+function renderPrimaryContent(data) {
+  detailSections.innerHTML = "";
+
+  if (data.diagnostics.length) {
+    const diagnosticsEl = document.createElement("section");
+    diagnosticsEl.className = "detail-section detail-diagnostics";
+    diagnosticsEl.innerHTML = `
+      <h3>Ingest Diagnostics</h3>
+      <ul class="diagnostic-list">
+        ${data.diagnostics
+          .map(
+            (diagnostic) =>
+              `<li><strong>${escapeHtml(diagnostic.code)}</strong>: ${escapeHtml(diagnostic.message)}</li>`,
+          )
+          .join("")}
+      </ul>
+    `;
+    detailSections.appendChild(diagnosticsEl);
+  }
+
+  const structuredNodes =
+    data.structured_document &&
+    data.structured_document.ast &&
+    Array.isArray(data.structured_document.ast.nodes)
+      ? data.structured_document.ast.nodes.filter(
+          (node) => String(node.title || "").trim().toLowerCase() !== "summary",
+        )
+      : [];
+
+  if (structuredNodes.length) {
+    renderStructuredNodes(structuredNodes, detailSections, data.citations || [], { renderedBibliography: false });
+    attachCitationToggleControls(detailSections);
+    const downloadButton = detailSections.querySelector(".bibliography-download-button");
+    if (downloadButton) {
+      downloadButton.addEventListener("click", () => {
+        const downloaded = downloadBibtexRecords(data.citations || [], `${data.slug || currentSlug || "ecospecies"}-bibliography`);
+        const note = detailSections.querySelector(".public-bibliography-note");
+        if (note && !downloaded) {
+          note.textContent = "No BibTeX records are available for download yet.";
+        }
+      });
+    }
+    return;
+  }
+
+  for (const section of data.sections) {
+    const sectionEl = document.createElement("section");
+    sectionEl.className = "detail-section";
+    sectionEl.innerHTML = `
+      <h3>${escapeHtml(section.heading)}</h3>
+      <pre>${escapeHtml(section.content)}</pre>
+    `;
+    detailSections.appendChild(sectionEl);
+  }
+}
+
+function renderLegacySource(data) {
+  const legacySource = data.legacy_source;
+  const hasLegacySource = Boolean(legacySource && String(legacySource.text || "").trim());
+  legacyPanel.classList.toggle("hidden", !hasLegacySource);
+  if (!hasLegacySource) {
+    legacySourceMeta.textContent = "";
+    legacySourceText.textContent = "";
+    return;
+  }
+
+  legacySourceMeta.textContent = legacySource.source_file
+    ? `Original imported file: ${legacySource.source_file}`
+    : "Original imported legacy material";
+  legacySourceText.textContent = String(legacySource.text || "");
+}
 
 function getAuthToken() {
   return window.localStorage.getItem("ecospecies_auth_token") || "";
@@ -45,10 +408,522 @@ function getAuthHeaders() {
 function escapeHtml(value) {
   return value
     .replaceAll("&", "&amp;")
+    .replaceAll('"', "&quot;")
     .replaceAll("<", "&lt;")
     .replaceAll(">", "&gt;");
 }
 
+function normalizeAbstractForDisplay(value) {
+  const raw = String(value || "").trim();
+  if (!raw) {
+    return "";
+  }
+  const temp = document.createElement("div");
+  temp.innerHTML = raw;
+  return temp.textContent
+    .replace(/^abstract\s*[:.\-]?\s*/i, "")
+    .replace(/\s+/g, " ")
+    .trim();
+}
+
+function parseMarkdownFrontMatter(markdown) {
+  const stripped = markdown.trimStart();
+  if (!stripped.startsWith("---\n")) {
+    return { metadata: {}, body: markdown };
+  }
+
+  const remainder = stripped.slice(4);
+  const separatorIndex = remainder.indexOf("\n---\n");
+  if (separatorIndex === -1) {
+    return { metadata: {}, body: markdown };
+  }
+
+  const metadataBlock = remainder.slice(0, separatorIndex);
+  const body = remainder.slice(separatorIndex + 5);
+  const metadata = {};
+
+  for (const line of metadataBlock.split("\n")) {
+    const separator = line.indexOf(":");
+    if (separator === -1) {
+      continue;
+    }
+    const key = line.slice(0, separator).trim();
+    const value = line.slice(separator + 1).trim();
+    if (key) {
+      metadata[key] = value;
+    }
+  }
+
+  return { metadata, body };
+}
+
+function renderDocumentPreview(markdown) {
+  const { metadata, body } = parseMarkdownFrontMatter(markdown);
+  const headings = body
+    .split("\n")
+    .map((line) => {
+      const match = line.match(/^(#{2,6})\s+(.+?)\s*$/);
+      if (!match) {
+        return null;
+      }
+      return {
+        depth: match[1].length,
+        title: match[2].trim(),
+      };
+    })
+    .filter(Boolean);
+
+  if (!headings.length && !Object.keys(metadata).length) {
+    documentPreview.innerHTML = `<p class="document-preview-empty">No headings detected yet.</p>`;
+    return;
+  }
+
+  const metadataItems = Object.entries(metadata)
+    .map(([key, value]) => `<li><strong>${escapeHtml(key)}</strong>: ${escapeHtml(value)}</li>`)
+    .join("");
+
+  const headingItems = headings
+    .map(
+      (heading) =>
+        `<li style="margin-left:${Math.max(0, heading.depth - 2) * 18}px">${escapeHtml(heading.title)}</li>`,
+    )
+    .join("");
+
+  documentPreview.innerHTML = `
+    ${metadataItems ? `<ul class="document-preview-metadata">${metadataItems}</ul>` : ""}
+    ${headingItems ? `<ol class="document-preview-list">${headingItems}</ol>` : '<p class="document-preview-empty">No headings detected yet.</p>'}
+  `;
+}
+
+function renderCitationList(items, editable) {
+  citationList.innerHTML = "";
+  if (!items.length) {
+    citationList.innerHTML = `<p class="editor-status">No citations have been extracted yet.</p>`;
+    return;
+  }
+
+  for (const item of items) {
+    const article = document.createElement("article");
+    article.className = "citation-entry";
+
+    const readOnlyMeta = [
+      item.section_heading ? `Section: ${escapeHtml(item.section_heading)}` : "",
+      item.legacy_reference_number
+        ? `Legacy reference: ${escapeHtml(item.legacy_reference_number)}`
+        : "",
+      item.source_type ? `Source: ${escapeHtml(item.source_type)}` : "",
+      item.enrichment_status ? `Enrichment: ${escapeHtml(item.enrichment_status)}` : "",
+    ]
+      .filter(Boolean)
+      .join(" • ");
+
+    if (!editable) {
+      article.innerHTML = `
+        <p class="citation-entry-meta">${readOnlyMeta}</p>
+        <p class="citation-entry-raw">${escapeHtml(item.raw_text || "")}</p>
+        <p class="citation-entry-meta">Review status: ${escapeHtml(item.review_status || "draft")}</p>
+        ${item.doi ? `<p class="citation-entry-meta">DOI: ${escapeHtml(item.doi)}</p>` : ""}
+      ${item.openalex_id ? `<p class="citation-entry-meta">OpenAlex: ${escapeHtml(item.openalex_id)}</p>` : ""}
+      ${item.resolver_source_label ? `<p class="citation-entry-meta">Resolver source: ${escapeHtml(item.resolver_source_label)}</p>` : ""}
+      ${renderCitationAbstractBlock(item.abstract_text || "", false)}
+      ${item.enrichment_error ? `<p class="citation-entry-meta error">${escapeHtml(item.enrichment_error)}</p>` : ""}
+      ${renderCitationBibtexBlock(item.draft_bibtex || "", false)}
+      `;
+      attachCitationToggleControls(article);
+      citationList.appendChild(article);
+      continue;
+    }
+
+    article.innerHTML = `
+      <p class="citation-entry-meta">${readOnlyMeta}</p>
+      <p class="citation-entry-raw">${escapeHtml(item.raw_text || "")}</p>
+      <label class="editor-label">Review Status</label>
+      <select class="citation-review-status">
+        <option value="draft"${item.review_status === "draft" ? " selected" : ""}>Draft</option>
+        <option value="reviewed"${item.review_status === "reviewed" ? " selected" : ""}>Reviewed</option>
+        <option value="accepted"${item.review_status === "accepted" ? " selected" : ""}>Accepted</option>
+        <option value="rejected"${item.review_status === "rejected" ? " selected" : ""}>Rejected</option>
+      </select>
+      <label class="editor-label">DOI</label>
+      <input class="citation-doi" type="text" value="${escapeHtml(item.doi || "")}">
+      <label class="editor-label">OpenAlex ID</label>
+      <input class="citation-openalex" type="text" value="${escapeHtml(item.openalex_id || "")}" disabled>
+      <label class="editor-label">Source URL</label>
+      <input class="citation-source-url" type="text" value="${escapeHtml(item.source_url || "")}" disabled>
+      <label class="editor-label">Resolver Source</label>
+      <input class="citation-resolver-source" type="text" value="${escapeHtml(item.resolver_source_label || "")}" disabled>
+      <label class="editor-label">Citation Key</label>
+      <input class="citation-key" type="text" value="${escapeHtml(item.citation_key || "")}">
+      <label class="editor-label">Entry Type</label>
+      <input class="citation-entry-type" type="text" value="${escapeHtml(item.entry_type || "misc")}">
+      <label class="editor-label">Normalized Citation</label>
+      <textarea class="citation-normalized" rows="3">${escapeHtml(item.normalized_text || "")}</textarea>
+      <label class="editor-label">Abstract</label>
+      <textarea class="citation-abstract" rows="5">${escapeHtml(item.abstract_text || "")}</textarea>
+      ${renderCitationAbstractBlock(item.abstract_text || "", true)}
+      <label class="editor-label">Draft BibTeX</label>
+      <textarea class="citation-bibtex-editor" rows="8">${escapeHtml(item.draft_bibtex || "")}</textarea>
+      ${renderCitationBibtexBlock(item.draft_bibtex || "", true)}
+      ${item.enrichment_error ? `<p class="citation-entry-meta error">${escapeHtml(item.enrichment_error)}</p>` : ""}
+      <div class="editor-actions">
+        <button type="button" class="secondary-button citation-enrich">Run Enrichment</button>
+        <button type="button" class="secondary-button citation-review-matches">Review Matches</button>
+        <button type="button" class="citation-save">Save Citation Review</button>
+      </div>
+    `;
+
+    article.querySelector(".citation-enrich").addEventListener("click", async () => {
+      if (!currentSlug) {
+        return;
+      }
+      citationStatus.textContent = `Running enrichment for citation ${item.position}...`;
+      const { response, data } = await requestJson(`/api/editor/species/${currentSlug}/citations/${item.id}/enrich`, {
+        method: "POST",
+        body: JSON.stringify({}),
+      });
+      if (!response.ok) {
+        citationStatus.textContent = data.error || "Citation enrichment failed";
+        return;
+      }
+      citationStatus.textContent = `Citation ${data.citation.position} enrichment ${data.citation.enrichment_status}`;
+      await Promise.all([loadSpecies(currentSlug), loadSpeciesCitations(currentSlug)]);
+    });
+
+    article.querySelector(".citation-save").addEventListener("click", async () => {
+      if (!currentSlug) {
+        return;
+      }
+      citationStatus.textContent = `Saving citation ${item.position}...`;
+      const { response, data } = await requestJson(`/api/editor/species/${currentSlug}/citations/${item.id}`, {
+        method: "POST",
+        body: JSON.stringify({
+          review_status: article.querySelector(".citation-review-status").value,
+          doi: article.querySelector(".citation-doi").value,
+          citation_key: article.querySelector(".citation-key").value,
+          entry_type: article.querySelector(".citation-entry-type").value,
+          normalized_text: article.querySelector(".citation-normalized").value,
+          abstract_text: article.querySelector(".citation-abstract").value,
+          draft_bibtex: article.querySelector(".citation-bibtex-editor").value,
+        }),
+      });
+      if (!response.ok) {
+        citationStatus.textContent = data.error || "Citation review save failed";
+        return;
+      }
+      citationStatus.textContent = `Citation ${data.citation.position} saved by ${data.last_modified_by}`;
+      await Promise.all([loadSpecies(currentSlug), loadSpeciesCitations(currentSlug)]);
+    });
+
+    article.querySelector(".citation-review-matches").addEventListener("click", async () => {
+      await openCitationMatchDialog(item.id);
+    });
+
+    attachCitationToggleControls(article);
+    citationList.appendChild(article);
+  }
+}
+
+function renderCitationAbstractBlock(abstractText, editable) {
+  const text = normalizeAbstractForDisplay(abstractText);
+  if (!text) {
+    return "";
+  }
+  const label = editable ? "Stored Abstract" : "Abstract";
+  return `
+    <div class="citation-abstract-shell">
+      <button type="button" class="secondary-button citation-abstract-toggle" aria-expanded="false">
+        Show ${label}
+      </button>
+      <div class="citation-abstract-display hidden">
+        <p class="public-citation-abstract">${escapeHtml(text)}</p>
+      </div>
+    </div>
+  `;
+}
+
+function renderCitationBibtexBlock(draftBibtex, editable) {
+  const text = String(draftBibtex || "").trim();
+  if (!text) {
+    return "";
+  }
+  const label = editable ? "Stored BibTeX" : "BibTeX";
+  return `
+    <div class="citation-detail-shell">
+      <button type="button" class="secondary-button citation-detail-toggle" aria-expanded="false">
+        Show ${label}
+      </button>
+      <div class="citation-detail-display hidden">
+        <pre class="citation-bibtex">${escapeHtml(text)}</pre>
+      </div>
+    </div>
+  `;
+}
+
+function attachCitationToggleControls(root) {
+  const toggles = root.querySelectorAll(".citation-abstract-toggle, .citation-detail-toggle");
+  for (const toggle of toggles) {
+    const shell = toggle.parentElement;
+    const display = shell && shell.querySelector(".citation-abstract-display, .citation-detail-display");
+    if (!display) {
+      continue;
+    }
+    const showLabel = toggle.textContent.replace(/^Hide /, "Show ").trim();
+    const hideLabel = showLabel.replace(/^Show /, "Hide ");
+    toggle.addEventListener("click", () => {
+      const hidden = display.classList.toggle("hidden");
+      toggle.setAttribute("aria-expanded", hidden ? "false" : "true");
+      toggle.textContent = hidden ? showLabel : hideLabel;
+    });
+  }
+}
+
+function renderMetadataTable(fields) {
+  const rows = [
+    ["Author", fields.author || ""],
+    ["Year", fields.year || ""],
+    ["Title", fields.title || ""],
+    ["Venue", fields.journal || fields.booktitle || fields.publisher || fields.howpublished || ""],
+    ["Volume", fields.volume || ""],
+    ["Issue", fields.number || ""],
+    ["Pages", fields.pages || ""],
+    ["DOI", fields.doi || ""],
+  ]
+    .filter(([, value]) => value)
+    .map(
+      ([label, value]) =>
+        `<div class="match-row"><span class="match-label">${escapeHtml(label)}</span><span>${escapeHtml(value)}</span></div>`,
+    )
+    .join("");
+  return rows || `<p class="editor-status">No structured metadata extracted yet.</p>`;
+}
+
+function renderFieldMatches(fieldMatches) {
+  return Object.entries(fieldMatches || {})
+    .map(([field, detail]) => {
+      const status = String(detail.status || "unknown");
+      return `
+        <div class="match-row">
+          <span class="match-label">${escapeHtml(field)}</span>
+          <span class="match-status match-status-${escapeHtml(status)}">${escapeHtml(status)}</span>
+          <span>${escapeHtml(String(detail.seed || ""))}</span>
+          <span>${escapeHtml(String(detail.candidate || ""))}</span>
+        </div>
+      `;
+    })
+    .join("");
+}
+
+function normalizeCitationIdentity(value) {
+  return String(value || "").trim().toLowerCase();
+}
+
+function candidateAlreadyExists(candidate) {
+  const candidateFields = candidate && candidate.fields ? candidate.fields : {};
+  const candidateDoi = normalizeCitationIdentity(candidateFields.doi || candidate.doi || "");
+  const candidateOpenAlex = normalizeCitationIdentity(candidateFields.openalex || candidate.openalex_id || "");
+  const candidateKey = normalizeCitationIdentity(candidate.citation_key || "");
+  const candidateText = normalizeCitationIdentity(candidate.normalized_text || "");
+
+  return currentSpeciesCitations.some((item) => {
+    const itemDoi = normalizeCitationIdentity(item.doi || "");
+    const itemOpenAlex = normalizeCitationIdentity(item.openalex_id || "");
+    const itemKey = normalizeCitationIdentity(item.citation_key || "");
+    const itemText = normalizeCitationIdentity(item.normalized_text || "");
+    return (
+      (candidateDoi && itemDoi && candidateDoi === itemDoi)
+      || (candidateOpenAlex && itemOpenAlex && candidateOpenAlex === itemOpenAlex)
+      || (candidateKey && itemKey && candidateKey === itemKey)
+      || (candidateText && itemText && candidateText === itemText)
+    );
+  });
+}
+
+function closeCitationMatchDialog() {
+  currentCitationMatch = null;
+  citationMatchDialog.classList.add("hidden");
+  citationMatchDialog.setAttribute("aria-hidden", "true");
+  citationMatchSeed.innerHTML = "";
+  citationMatchCandidates.innerHTML = "";
+  citationMatchStatus.textContent = "Compare the parsed source citation against candidate metadata.";
+}
+
+async function applyCitationCandidate(candidate) {
+  if (!currentSlug || !currentCitationMatch) {
+    return;
+  }
+  citationMatchStatus.textContent = `Applying ${candidate.source_label || "candidate"}...`;
+  const { response, data } = await requestJson(
+    `/api/editor/species/${currentSlug}/citations/${currentCitationMatch.citationId}/apply-match`,
+    {
+      method: "POST",
+      body: JSON.stringify({ candidate }),
+    },
+  );
+  if (!response.ok) {
+    citationMatchStatus.textContent = data.error || "Candidate application failed";
+    return;
+  }
+  citationStatus.textContent = `Citation ${data.citation.position} accepted from reviewed candidate`;
+  closeCitationMatchDialog();
+  expandCitationPanel();
+  await Promise.all([loadSummary(), loadSpeciesList(searchInput.value), loadSpeciesCitations(currentSlug)]);
+  expandCitationPanel();
+}
+
+async function addCitationCandidate(candidate) {
+  if (!currentSlug || !currentCitationMatch) {
+    return;
+  }
+  citationMatchStatus.textContent = `Adding ${candidate.source_label || "candidate"} as another citation...`;
+  const { response, data } = await requestJson(
+    `/api/editor/species/${currentSlug}/citations/${currentCitationMatch.citationId}/add-match`,
+    {
+      method: "POST",
+      body: JSON.stringify({ candidate }),
+    },
+  );
+  if (!response.ok) {
+    citationMatchStatus.textContent = data.error || "Candidate addition failed";
+    return;
+  }
+  citationStatus.textContent = `Added reviewed candidate as citation ${data.citation.position}`;
+  citationMatchStatus.textContent = `Added as citation ${data.citation.position}. You can continue reviewing other candidates.`;
+  expandCitationPanel();
+  await Promise.all([loadSummary(), loadSpeciesList(searchInput.value), loadSpeciesCitations(currentSlug)]);
+  expandCitationPanel();
+}
+
+async function openCitationMatchDialog(citationId) {
+  if (!currentSlug || !isEditorSession()) {
+    return;
+  }
+  currentCitationMatch = { citationId };
+  citationMatchDialog.classList.remove("hidden");
+  citationMatchDialog.setAttribute("aria-hidden", "false");
+  citationMatchSeed.innerHTML = "";
+  citationMatchCandidates.innerHTML = "";
+  citationMatchStatus.textContent = "Loading candidate matches...";
+
+  const { response, data } = await requestJson(`/api/editor/species/${currentSlug}/citations/${citationId}/candidates`);
+  if (!response.ok) {
+    citationMatchStatus.textContent = data.error || "Candidate lookup failed";
+    citationMatchCandidates.innerHTML = `<p class="error">${escapeHtml(data.error || "Unable to load candidates.")}</p>`;
+    return;
+  }
+
+  citationMatchSeed.innerHTML = `
+    <p class="citation-entry-raw">${escapeHtml(data.citation.raw_text || "")}</p>
+    ${renderMetadataTable((data.seed && data.seed.fields) || {})}
+    ${renderCitationAbstractBlock((data.seed && (data.seed.abstract_text || (data.seed.fields && data.seed.fields.abstract))) || "", false)}
+    ${data.seed && data.seed.normalized_text ? `<p class="editor-status">${escapeHtml(data.seed.normalized_text)}</p>` : ""}
+  `;
+  attachCitationToggleControls(citationMatchSeed);
+
+  const candidates = Array.isArray(data.candidates) ? data.candidates : [];
+  citationMatchStatus.textContent = `${candidates.length} candidate${candidates.length === 1 ? "" : "s"} found`;
+  if (!candidates.length) {
+    citationMatchCandidates.innerHTML = `<p class="editor-status">No close candidates were returned for this citation.</p>`;
+    return;
+  }
+
+  citationMatchCandidates.innerHTML = "";
+  for (const candidate of candidates) {
+    const alreadyExists = candidateAlreadyExists(candidate);
+    const card = document.createElement("article");
+    card.className = "match-candidate-card";
+    card.innerHTML = `
+      <div class="match-candidate-header">
+        <strong>${escapeHtml(candidate.fields?.title || "Untitled candidate")}</strong>
+        <span class="match-score">Score ${escapeHtml(String(candidate.score || 0))}</span>
+      </div>
+      <p class="citation-entry-meta">${escapeHtml(candidate.source_label || "")}</p>
+      ${alreadyExists ? `<p class="citation-entry-meta">Already present in this species' citation set.</p>` : ""}
+      ${candidate.conflict_reason ? `<p class="citation-entry-meta error">${escapeHtml(candidate.conflict_reason)}</p>` : ""}
+      ${renderMetadataTable(candidate.fields || {})}
+      ${renderCitationAbstractBlock(candidate.abstract_text || (candidate.fields && candidate.fields.abstract) || "", false)}
+      <div class="match-table">
+        <div class="match-row match-row-head">
+          <span class="match-label">Field</span>
+          <span>Status</span>
+          <span>Source</span>
+          <span>Candidate</span>
+        </div>
+        ${renderFieldMatches(candidate.field_matches || {})}
+      </div>
+      ${candidate.normalized_text ? `<p class="editor-status">${escapeHtml(candidate.normalized_text)}</p>` : ""}
+      <div class="editor-actions">
+        <button type="button" class="candidate-apply">Use This Candidate</button>
+        <button type="button" class="secondary-button candidate-add"${alreadyExists ? " disabled" : ""}>Add As Another Citation</button>
+      </div>
+    `;
+    card.querySelector(".candidate-apply").addEventListener("click", async () => {
+      await applyCitationCandidate(candidate);
+    });
+    if (!alreadyExists) {
+      card.querySelector(".candidate-add").addEventListener("click", async () => {
+        await addCitationCandidate(candidate);
+      });
+    }
+    attachCitationToggleControls(card);
+    citationMatchCandidates.appendChild(card);
+  }
+}
+
+async function loadSpeciesDocument(slug) {
+  if (!isEditorSession() && !isContributorSession()) {
+    documentPanel.classList.add("hidden");
+    return;
+  }
+
+  documentPanel.classList.remove("hidden");
+  documentStatus.textContent = "Loading document...";
+  const path = isEditorSession()
+    ? `/api/editor/species/${slug}/document`
+    : `/api/contributor/species/${slug}/document`;
+  const { response, data } = await requestJson(path);
+  if (!response.ok) {
+    documentMarkdown.value = "";
+    documentPreview.innerHTML = `<p class="error">${escapeHtml(data.error || "Unable to load document.")}</p>`;
+    documentStatus.textContent = data.error || "Document load failed";
+    return;
+  }
+
+  documentMarkdown.value = data.markdown || "";
+  renderDocumentPreview(documentMarkdown.value);
+  documentStatus.textContent = data.updated_by
+    ? `Document last updated by ${data.updated_by}`
+    : "Document loaded";
+}
+
+async function loadSpeciesCitations(slug, fallbackData = null) {
+  citationPanel.classList.remove("hidden");
+  citationBackfillSpeciesButton.classList.toggle("hidden", !isEditorSession());
+  citationEnrichAllButton.classList.toggle("hidden", !isEditorSession());
+
+  if (!isEditorSession() && !isContributorSession()) {
+    const items = Array.isArray(fallbackData && fallbackData.citations) ? fallbackData.citations : [];
+    currentSpeciesCitations = items;
+    renderCitationList(items, false);
+    citationStatus.textContent = `${items.length} citation${items.length === 1 ? "" : "s"}`;
+    return;
+  }
+
+  citationStatus.textContent = "Loading citations...";
+  const path = isEditorSession()
+    ? `/api/editor/species/${slug}/citations`
+    : `/api/contributor/species/${slug}/citations`;
+  const { response, data } = await requestJson(path);
+  if (!response.ok) {
+    citationList.innerHTML = `<p class="error">${escapeHtml(data.error || "Unable to load citations.")}</p>`;
+    citationStatus.textContent = data.error || "Citation load failed";
+    return;
+  }
+
+  currentSpeciesCitations = Array.isArray(data.citations) ? data.citations : [];
+  renderCitationList(currentSpeciesCitations, isEditorSession());
+  citationStatus.textContent = `${data.citation_count || 0} citation${data.citation_count === 1 ? "" : "s"} extracted`;
+}
+
 async function requestJson(path, options = {}) {
   const headers = new Headers(options.headers || {});
   const authHeaders = getAuthHeaders();
@@ -67,6 +942,10 @@ function isEditorSession() {
   return Boolean(currentSession && currentSession.user && ["editor", "admin"].includes(currentSession.user.role));
 }
 
+function isContributorSession() {
+  return Boolean(currentSession && currentSession.user && currentSession.user.role === "contributor");
+}
+
 function getVisibleItems(items) {
   if (!isEditorSession()) {
     return items;
@@ -82,6 +961,7 @@ function getVisibleItems(items) {
 
 function syncArchiveFilterUi() {
   archiveFilterGroup.classList.toggle("hidden", !isEditorSession());
+  contributorCreateButton.classList.toggle("hidden", !isContributorSession());
   for (const button of archiveFilterGroup.querySelectorAll("[data-archive-filter]")) {
     button.classList.toggle("is-active", button.dataset.archiveFilter === currentArchiveFilter);
   }
@@ -93,9 +973,11 @@ async function loadSession() {
   if (!isEditorSession()) {
     currentArchiveFilter = "active";
   }
+  contributorAgeLabel.textContent = String(data.minimum_contributor_age || 13);
   authTokenInput.value = getAuthToken();
   if (data.authenticated) {
     authStatus.textContent = `${data.user.username} (${data.user.role})`;
+    contributorStatus.textContent = isContributorSession() ? "Contributor token stored in this browser." : "";
   } else if (data.auth_configured) {
     authStatus.textContent = "Auth configured, public session";
   } else {
@@ -124,21 +1006,43 @@ function renderSpecies(items) {
     button.className = item.is_archived ? "species-card species-card-archived" : "species-card";
     button.type = "button";
     const archivedMeta = item.is_archived ? `<span class="species-state-badge">Archived</span>` : "";
+    const commonName = item.common_name || item.title;
+    const scientificName = item.scientific_name || "Scientific name missing";
     button.innerHTML = `
-      <span class="species-name">${escapeHtml(item.common_name || item.title)}</span>
-      <span class="species-meta">${escapeHtml(item.scientific_name || "Scientific name missing")}</span>
+      <span class="species-name">${escapeHtml(commonName)}</span>
+      <span class="species-meta">Common name: ${escapeHtml(commonName)}</span>
+      <span class="species-meta">Scientific name: ${escapeHtml(scientificName)}</span>
       <span class="species-meta">${escapeHtml(item.publication_status || "published")}${archivedMeta}</span>
       <span class="species-meta">${item.diagnostic_count ? `${item.diagnostic_count} ingest flags` : "No ingest flags"}</span>
       <span class="species-snippet">${escapeHtml((item.summary || "No summary extracted yet.").slice(0, 180))}</span>
     `;
-    button.addEventListener("click", () => loadSpecies(item.slug));
+    button.addEventListener("click", () => {
+      window.location.hash = item.slug;
+      loadSpecies(item.slug);
+    });
     speciesList.appendChild(button);
   }
 }
 
+function formatIdentifierBanner(item) {
+  if (item.primary_taxon_identifier && item.primary_taxon_authority) {
+    return `${String(item.primary_taxon_authority).toUpperCase()} ${item.primary_taxon_identifier.identifier || ""}`.trim();
+  }
+  const legacyIdentifier = Array.isArray(item.legacy_identifiers) ? item.legacy_identifiers[0] : null;
+  if (legacyIdentifier && legacyIdentifier.identifier) {
+    const label = legacyIdentifier.label || "Legacy identifier";
+    return `${label} ${legacyIdentifier.identifier}`;
+  }
+  return "No external taxon identifier assigned";
+}
+
 async function loadSpeciesList(search = "") {
   const query = search ? `?search=${encodeURIComponent(search)}` : "";
-  const path = isEditorSession() ? `/api/editor/species${query}` : `/api/species${query}`;
+  const path = isEditorSession()
+    ? `/api/editor/species${query}`
+    : isContributorSession()
+      ? `/api/contributor/species${query}`
+      : `/api/species${query}`;
   const { data } = await requestJson(path);
   currentItems = data.items;
   syncArchiveFilterUi();
@@ -146,8 +1050,14 @@ async function loadSpeciesList(search = "") {
 }
 
 async function loadSpecies(slug) {
+  const previousSlug = currentSlug;
   currentSlug = slug;
-  const path = isEditorSession() ? `/api/editor/species/${slug}` : `/api/species/${slug}`;
+  closeCitationMatchDialog();
+  const path = isEditorSession()
+    ? `/api/editor/species/${slug}`
+    : isContributorSession()
+      ? `/api/contributor/species/${slug}`
+      : `/api/species/${slug}`;
   const { response, data } = await requestJson(path);
   if (!response.ok) {
     detailEmpty.classList.remove("hidden");
@@ -158,8 +1068,11 @@ async function loadSpecies(slug) {
 
   detailEmpty.classList.add("hidden");
   detail.classList.remove("hidden");
+  if (previousSlug !== slug) {
+    collapseWorkflowPanels();
+  }
 
-  detailCode.textContent = data.flelmr_code ? `FLELMR ${data.flelmr_code}` : "Legacy source file";
+  detailCode.textContent = formatIdentifierBanner(data);
   detailCommonName.textContent = data.common_name || data.title;
   detailArchiveBadge.classList.toggle("hidden", !data.is_archived);
   detailArchiveNote.classList.toggle("hidden", !data.is_archived);
@@ -169,61 +1082,23 @@ async function loadSpecies(slug) {
   auditPanel.classList.toggle("hidden", !isEditorSession());
   if (isEditorSession()) {
     editorPublicationStatus.value = data.publication_status || "published";
-    editorSummary.value = data.summary || "";
     editorNotes.value = data.editor_notes || "";
     editorIsArchived.checked = Boolean(data.is_archived);
     editorStatus.textContent = data.last_modified_by
       ? `Last modified by ${data.last_modified_by}`
       : "Editor session active";
-    await loadAudit(slug);
+    await Promise.all([loadAudit(slug), loadSpeciesDocument(slug), loadSpeciesCitations(slug)]);
+  } else if (isContributorSession()) {
+    editorStatus.textContent = "";
+    await Promise.all([loadSpeciesDocument(slug), loadSpeciesCitations(slug)]);
+  } else {
+    documentPanel.classList.add("hidden");
+    await loadSpeciesCitations(slug, data);
   }
 
-  detailSections.innerHTML = "";
-  if (data.diagnostics.length) {
-    const diagnosticsEl = document.createElement("section");
-    diagnosticsEl.className = "detail-section detail-diagnostics";
-    diagnosticsEl.innerHTML = `
-      <h3>Ingest Diagnostics</h3>
-      <ul class="diagnostic-list">
-        ${data.diagnostics
-          .map(
-            (diagnostic) =>
-              `<li><strong>${escapeHtml(diagnostic.code)}</strong>: ${escapeHtml(diagnostic.message)}</li>`,
-          )
-          .join("")}
-      </ul>
-    `;
-    detailSections.appendChild(diagnosticsEl);
-  }
-  for (const section of data.sections) {
-    const sectionEl = document.createElement("section");
-    sectionEl.className = "detail-section";
-    if (isEditorSession()) {
-      sectionEl.innerHTML = `
-        <h3>${escapeHtml(section.heading)}</h3>
-        <textarea class="section-editor" data-section-position="${section.position}" rows="10">${escapeHtml(section.content)}</textarea>
-        <div class="editor-actions">
-          <button type="button" class="section-save" data-section-position="${section.position}">Save Section</button>
-        </div>
-      `;
-    } else {
-      sectionEl.innerHTML = `
-        <h3>${escapeHtml(section.heading)}</h3>
-        <pre>${escapeHtml(section.content)}</pre>
-      `;
-    }
-    detailSections.appendChild(sectionEl);
-  }
-
-  if (isEditorSession()) {
-    for (const button of detailSections.querySelectorAll(".section-save")) {
-      button.addEventListener("click", async (event) => {
-        const position = event.currentTarget.dataset.sectionPosition;
-        const textarea = detailSections.querySelector(`textarea[data-section-position="${position}"]`);
-        await saveSectionContent(Number(position), textarea.value);
-      });
-    }
-  }
+  renderLegacySource(data);
+  restoreWorkflowPanels();
+  renderPrimaryContent(data);
 }
 
 function renderAudit(items) {
@@ -273,7 +1148,6 @@ async function saveEditorialChanges() {
     method: "POST",
     body: JSON.stringify({
       publication_status: editorPublicationStatus.value,
-      summary: editorSummary.value,
       editor_notes: editorNotes.value,
       is_archived: editorIsArchived.checked,
     }),
@@ -286,21 +1160,63 @@ async function saveEditorialChanges() {
   await Promise.all([loadSummary(), loadSpeciesList(searchInput.value), loadSpecies(currentSlug)]);
 }
 
-async function saveSectionContent(sectionPosition, content) {
-  if (!currentSlug || !isEditorSession()) {
+async function saveDocumentMarkdown() {
+  if (!currentSlug || (!isEditorSession() && !isContributorSession())) {
     return;
   }
-  editorStatus.textContent = `Saving section ${sectionPosition}...`;
-  const { response, data } = await requestJson(`/api/editor/species/${currentSlug}/sections/${sectionPosition}`, {
+  documentStatus.textContent = "Saving document...";
+  const path = isEditorSession()
+    ? `/api/editor/species/${currentSlug}/document`
+    : `/api/contributor/species/${currentSlug}/document`;
+  const { response, data } = await requestJson(path, {
     method: "POST",
-    body: JSON.stringify({ content }),
+    body: JSON.stringify({ markdown: documentMarkdown.value }),
   });
   if (!response.ok) {
-    editorStatus.textContent = data.error || "Section save failed";
+    documentStatus.textContent = data.error || "Document save failed";
     return;
   }
-  editorStatus.textContent = `Section ${sectionPosition} saved by ${data.last_modified_by}`;
-  await loadSpecies(currentSlug);
+  renderDocumentPreview(documentMarkdown.value);
+  documentStatus.textContent = `Document saved by ${data.updated_by}`;
+  await Promise.all([loadSummary(), loadSpeciesList(searchInput.value), loadSpecies(currentSlug)]);
+}
+
+async function registerContributor() {
+  contributorStatus.textContent = "Registering contributor...";
+  const { response, data } = await requestJson("/api/contributor/register", {
+    method: "POST",
+    body: JSON.stringify({
+      email: contributorEmailInput.value.trim(),
+      age_gate_confirmed: contributorAgeGate.checked,
+    }),
+  });
+  if (!response.ok) {
+    contributorStatus.textContent = data.error || "Contributor registration failed";
+    return;
+  }
+  window.localStorage.setItem("ecospecies_auth_token", data.token);
+  authTokenInput.value = data.token;
+  contributorStatus.textContent = data.warning;
+  await loadSession();
+  await loadSpeciesList(searchInput.value);
+}
+
+async function createContributorDraft() {
+  if (!isContributorSession()) {
+    return;
+  }
+  contributorStatus.textContent = "Creating new contributor draft...";
+  const { response, data } = await requestJson("/api/contributor/species", {
+    method: "POST",
+    body: JSON.stringify({}),
+  });
+  if (!response.ok) {
+    contributorStatus.textContent = data.error || "Draft creation failed";
+    return;
+  }
+  contributorStatus.textContent = "Draft created. Store your token carefully.";
+  await loadSpeciesList(searchInput.value);
+  await loadSpecies(data.slug);
 }
 
 searchInput.addEventListener("input", async (event) => {
@@ -315,6 +1231,17 @@ for (const button of archiveFilterGroup.querySelectorAll("[data-archive-filter]"
   });
 }
 
+for (const button of collapsibleToggles) {
+  button.addEventListener("click", () => {
+    const panel = document.getElementById(button.dataset.target || "");
+    if (!panel || panel.classList.contains("hidden")) {
+      return;
+    }
+    const expanded = panel.classList.contains("collapsed");
+    setCollapsibleState(panel, expanded);
+  });
+}
+
 authSaveButton.addEventListener("click", async () => {
   const token = authTokenInput.value.trim();
   if (token) {
@@ -330,6 +1257,7 @@ authSaveButton.addEventListener("click", async () => {
 authClearButton.addEventListener("click", async () => {
   window.localStorage.removeItem("ecospecies_auth_token");
   authTokenInput.value = "";
+  contributorStatus.textContent = "";
   await loadSession();
   await loadSpeciesList(searchInput.value);
   if (currentSlug) {
@@ -338,12 +1266,67 @@ authClearButton.addEventListener("click", async () => {
 });
 
 editorSaveButton.addEventListener("click", saveEditorialChanges);
+documentSaveButton.addEventListener("click", saveDocumentMarkdown);
+contributorRegisterButton.addEventListener("click", registerContributor);
+contributorCreateButton.addEventListener("click", createContributorDraft);
+citationMatchCloseButton.addEventListener("click", closeCitationMatchDialog);
+citationMatchDialog.querySelector(".match-dialog-backdrop").addEventListener("click", closeCitationMatchDialog);
+citationBackfillSpeciesButton.addEventListener("click", async () => {
+  if (!currentSlug || !isEditorSession()) {
+    return;
+  }
+  expandCitationPanel();
+  citationStatus.textContent = "Running citation backfill for this species...";
+  const { response, data } = await requestJson(`/api/editor/species/${currentSlug}/citations/backfill`, {
+    method: "POST",
+    body: JSON.stringify({}),
+  });
+  if (!response.ok) {
+    citationStatus.textContent = data.error || "Species citation backfill failed";
+    return;
+  }
+  citationStatus.textContent =
+    `Species backfill complete: ${data.backfilled_count || 0} checked, ${data.changed_count || 0} changed, ${data.resolved_count || 0} resolved, ${data.unresolved_count || 0} unresolved, ${data.error_count || 0} errors`;
+  await Promise.all([loadSpecies(currentSlug), loadSpeciesCitations(currentSlug)]);
+});
+citationEnrichAllButton.addEventListener("click", async () => {
+  if (!currentSlug || !isEditorSession()) {
+    return;
+  }
+  expandCitationPanel();
+  citationStatus.textContent = "Running enrichment for all citations...";
+  const { response, data } = await requestJson(`/api/editor/species/${currentSlug}/citations/enrich`, {
+    method: "POST",
+    body: JSON.stringify({}),
+  });
+  if (!response.ok) {
+    citationStatus.textContent = data.error || "Batch citation enrichment failed";
+    return;
+  }
+  citationStatus.textContent =
+    `Batch enrichment complete: ${data.resolved_count || 0} resolved, ${data.unresolved_count || 0} unresolved, ${data.error_count || 0} errors`;
+  await Promise.all([loadSpecies(currentSlug), loadSpeciesCitations(currentSlug)]);
+});
+documentMarkdown.addEventListener("input", () => {
+  renderDocumentPreview(documentMarkdown.value);
+});
 
 async function bootstrap() {
   await loadSession();
   await Promise.all([loadSummary(), loadSpeciesList()]);
+  const initialSlug = getInitialSpeciesSlug();
+  if (initialSlug) {
+    await loadSpecies(initialSlug);
+  }
 }
 
 bootstrap().catch((error) => {
   speciesList.innerHTML = `<p class="error">Failed to load data: ${escapeHtml(String(error))}</p>`;
 });
+
+window.addEventListener("hashchange", async () => {
+  const slug = getInitialSpeciesSlug();
+  if (slug && slug !== currentSlug) {
+    await loadSpecies(slug);
+  }
+});
diff --git a/apps/web/bibliography.html b/apps/web/bibliography.html
new file mode 100644
index 0000000..0a1e9ad
--- /dev/null
+++ b/apps/web/bibliography.html
@@ -0,0 +1,43 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>EcoSpecies Bibliography</title>
+    <link rel="stylesheet" href="./styles.css">
+  </head>
+  <body>
+    <header class="site-header">
+      <div class="site-header-inner">
+        <div class="site-brand">
+          <p class="site-brand-mark">Open Species Archive</p>
+          <a href="./index.html" class="site-brand-link">EcoSpecies Atlas</a>
+          <p class="site-brand-summary">Public field atlas for historical species life-history materials.</p>
+        </div>
+        <nav class="site-nav" aria-label="Primary">
+          <a href="./index.html">Atlas</a>
+          <a href="./bibliography.html">Bibliography</a>
+        </nav>
+      </div>
+    </header>
+    <main class="page">
+      <section class="hero">
+        <p class="eyebrow">EcoSpecies Atlas</p>
+        <h1>Bibliography</h1>
+        <p class="lede">
+          A site-wide bibliography for the EcoSpecies atlas, including imported references and citations added during review.
+        </p>
+        <div class="auth-bar auth-panel-row">
+          <input id="bibliography-search" type="search" placeholder="Search title, author, DOI, or abstract">
+          <button id="bibliography-download" type="button" class="secondary-button">Download BibTeX</button>
+          <p id="bibliography-status" class="auth-status">Loading bibliography...</p>
+        </div>
+      </section>
+
+      <section class="panel">
+        <div id="bibliography-list" class="public-citation-list"></div>
+      </section>
+    </main>
+    <script src="./bibliography.js" defer></script>
+  </body>
+</html>
diff --git a/apps/web/bibliography.js b/apps/web/bibliography.js
new file mode 100644
index 0000000..6935ec5
--- /dev/null
+++ b/apps/web/bibliography.js
@@ -0,0 +1,230 @@
+function getAppBase() {
+  const { pathname } = window.location;
+  if (pathname === "/" || pathname === "/index.html") {
+    return "";
+  }
+  if (pathname.endsWith("/index.html")) {
+    return pathname.slice(0, -"/index.html".length);
+  }
+  return pathname.endsWith("/") ? pathname.slice(0, -1) : pathname;
+}
+
+const apiBase = getAppBase().replace(/\/bibliography\.html$/, "");
+const bibliographyList = document.querySelector("#bibliography-list");
+const bibliographySearch = document.querySelector("#bibliography-search");
+const bibliographyStatus = document.querySelector("#bibliography-status");
+const bibliographyDownload = document.querySelector("#bibliography-download");
+let currentBibliographyItems = [];
+
+function escapeHtml(value) {
+  return String(value)
+    .replaceAll("&", "&amp;")
+    .replaceAll('"', "&quot;")
+    .replaceAll("<", "&lt;")
+    .replaceAll(">", "&gt;");
+}
+
+function normalizeAbstractForDisplay(value) {
+  const raw = String(value || "").trim();
+  if (!raw) {
+    return "";
+  }
+  const temp = document.createElement("div");
+  temp.innerHTML = raw;
+  return temp.textContent
+    .replace(/^abstract\s*[:.\-]?\s*/i, "")
+    .replace(/\s+/g, " ")
+    .trim();
+}
+
+function parseBibtexFields(draftBibtex) {
+  const fields = {};
+  const text = String(draftBibtex || "");
+  const pattern = /([a-zA-Z_]+)\s*=\s*\{([^}]*)\}/g;
+  let match = pattern.exec(text);
+  while (match) {
+    fields[match[1].toLowerCase()] = match[2].trim();
+    match = pattern.exec(text);
+  }
+  return fields;
+}
+
+function collectBibtexRecords(items) {
+  const seen = new Set();
+  const records = [];
+  for (const item of items || []) {
+    const draftBibtex = String(item && item.draft_bibtex ? item.draft_bibtex : "").trim();
+    if (!draftBibtex || seen.has(draftBibtex)) {
+      continue;
+    }
+    seen.add(draftBibtex);
+    records.push(draftBibtex);
+  }
+  return records;
+}
+
+function downloadBibtexRecords(items, filenameStem) {
+  const records = collectBibtexRecords(items);
+  if (!records.length) {
+    return false;
+  }
+  const blob = new Blob([`${records.join("\n\n")}\n`], { type: "application/x-bibtex;charset=utf-8" });
+  const url = URL.createObjectURL(blob);
+  const link = document.createElement("a");
+  link.href = url;
+  link.download = `${filenameStem}.bib`;
+  document.body.appendChild(link);
+  link.click();
+  document.body.removeChild(link);
+  window.setTimeout(() => URL.revokeObjectURL(url), 0);
+  return true;
+}
+
+function syncDownloadButton(items) {
+  if (!bibliographyDownload) {
+    return;
+  }
+  const recordCount = collectBibtexRecords(items).length;
+  bibliographyDownload.disabled = !recordCount;
+  bibliographyDownload.textContent = recordCount
+    ? `Download BibTeX (${recordCount})`
+    : "Download BibTeX";
+}
+
+function buildCitationText(item) {
+  const fields = parseBibtexFields(item.draft_bibtex || "");
+  if (item.normalized_text) {
+    return escapeHtml(item.normalized_text);
+  }
+  const author = fields.author || "";
+  const year = fields.year || "";
+  const title = fields.title || "";
+  const venue = fields.journal || fields.booktitle || fields.publisher || "";
+  const volume = fields.volume || "";
+  const issue = fields.number || "";
+  const pages = fields.pages || "";
+  const parts = [];
+  const lead = [author, year ? `(${year})` : ""].filter(Boolean).join(" ");
+  if (lead) {
+    parts.push(lead);
+  }
+  if (title) {
+    parts.push(title);
+  }
+  const venueBits = [venue, volume ? `${volume}${issue ? `(${issue})` : ""}` : issue ? `(${issue})` : "", pages]
+    .filter(Boolean)
+    .join(", ");
+  if (venueBits) {
+    parts.push(venueBits);
+  }
+  return escapeHtml(parts.join(". ").trim() || item.raw_text || "");
+}
+
+function renderSpeciesRefs(refs) {
+  return refs
+    .map(
+      (ref) =>
+        `<a href="./index.html#${escapeHtml(ref.slug)}">${escapeHtml(ref.common_name || ref.slug)}</a>`,
+    )
+    .join(", ");
+}
+
+function renderAbstractBlock(text) {
+  const abstract = normalizeAbstractForDisplay(text);
+  if (!abstract) {
+    return "";
+  }
+  return `
+    <div class="citation-abstract-shell">
+      <button type="button" class="secondary-button citation-abstract-toggle" aria-expanded="false">
+        Show Abstract
+      </button>
+      <div class="citation-abstract-display hidden">
+        <p class="public-citation-abstract">${escapeHtml(abstract)}</p>
+      </div>
+    </div>
+  `;
+}
+
+function attachCitationAbstractToggles(root) {
+  for (const toggle of root.querySelectorAll(".citation-abstract-toggle")) {
+    const shell = toggle.parentElement;
+    const display = shell && shell.querySelector(".citation-abstract-display");
+    if (!display) {
+      continue;
+    }
+    toggle.addEventListener("click", () => {
+      const hidden = display.classList.toggle("hidden");
+      toggle.setAttribute("aria-expanded", hidden ? "false" : "true");
+      toggle.textContent = hidden ? "Show Abstract" : "Hide Abstract";
+    });
+  }
+}
+
+function renderBibliography(items) {
+  bibliographyList.innerHTML = "";
+  if (!items.length) {
+    bibliographyList.innerHTML = `<p class="editor-status">No bibliography entries match the current search.</p>`;
+    return;
+  }
+
+  for (const item of items) {
+    const links = [
+      item.doi ? `<a href="https://doi.org/${encodeURIComponent(String(item.doi).replace(/^https?:\/\/doi\.org\//, ""))}" target="_blank" rel="noopener noreferrer">DOI</a>` : "",
+      item.source_url ? `<a href="${escapeHtml(item.source_url)}" target="_blank" rel="noopener noreferrer">Source</a>` : "",
+      item.openalex_id ? `<a href="https://openalex.org/${escapeHtml(String(item.openalex_id).replace(/^https?:\/\/openalex\.org\//, ""))}" target="_blank" rel="noopener noreferrer">OpenAlex</a>` : "",
+    ]
+      .filter(Boolean)
+      .join(" · ");
+
+    const article = document.createElement("article");
+    article.className = "public-citation-entry";
+    article.innerHTML = `
+      <p class="public-citation-text">${buildCitationText(item)}</p>
+      ${renderAbstractBlock(item.abstract_text || "")}
+      <p class="public-citation-meta">
+        Appears in ${item.species_count} species record${item.species_count === 1 ? "" : "s"}
+        ${item.legacy_reference_numbers && item.legacy_reference_numbers.length ? ` • Imported references: ${item.legacy_reference_numbers.map((value) => escapeHtml(value)).join(", ")}` : ""}
+      </p>
+      <p class="public-citation-meta">Species: ${renderSpeciesRefs(item.species_refs || [])}</p>
+      ${links ? `<p class="public-citation-links">${links}</p>` : ""}
+    `;
+    attachCitationAbstractToggles(article);
+    bibliographyList.appendChild(article);
+  }
+}
+
+async function loadBibliography(search = "") {
+  bibliographyStatus.textContent = "Loading bibliography...";
+  const query = search ? `?search=${encodeURIComponent(search)}` : "";
+  const response = await fetch(`${apiBase}/api/bibliography${query}`);
+  const data = await response.json();
+  if (!response.ok) {
+    bibliographyList.innerHTML = `<p class="error">${escapeHtml(data.error || "Unable to load bibliography.")}</p>`;
+    bibliographyStatus.textContent = data.error || "Bibliography load failed";
+    return;
+  }
+
+  currentBibliographyItems = data.items || [];
+  renderBibliography(currentBibliographyItems);
+  syncDownloadButton(currentBibliographyItems);
+  bibliographyStatus.textContent = `${data.count || 0} bibliography entr${data.count === 1 ? "y" : "ies"}`;
+}
+
+bibliographySearch.addEventListener("input", async (event) => {
+  await loadBibliography(event.target.value);
+});
+
+loadBibliography().catch((error) => {
+  bibliographyList.innerHTML = `<p class="error">Failed to load bibliography: ${escapeHtml(String(error))}</p>`;
+  bibliographyStatus.textContent = "Bibliography load failed";
+});
+
+if (bibliographyDownload) {
+  bibliographyDownload.addEventListener("click", () => {
+    const downloaded = downloadBibtexRecords(currentBibliographyItems, "ecospecies-bibliography");
+    if (!downloaded) {
+      bibliographyStatus.textContent = "No BibTeX records are available for download yet.";
+    }
+  });
+}
diff --git a/apps/web/index.html b/apps/web/index.html
index e1f969a..b93b972 100644
--- a/apps/web/index.html
+++ b/apps/web/index.html
@@ -7,20 +7,31 @@
     <link rel="stylesheet" href="./styles.css">
   </head>
   <body>
+    <header class="site-header">
+      <div class="site-header-inner">
+        <div class="site-brand">
+          <p class="site-brand-mark">Open Species Archive</p>
+          <a href="./index.html" class="site-brand-link">EcoSpecies Atlas</a>
+          <p class="site-brand-summary">Public field atlas for historical species life-history materials.</p>
+        </div>
+        <nav class="site-nav" aria-label="Primary">
+          <a href="./index.html">Atlas</a>
+          <a href="./bibliography.html">Bibliography</a>
+        </nav>
+      </div>
+    </header>
     <main class="page">
       <section class="hero">
-        <p class="eyebrow">Marine Species Knowledge System</p>
-        <h1>EcoSpecies</h1>
+        <p class="eyebrow">Open Biodiversity Reference</p>
+        <h1>EcoSpecies Atlas</h1>
         <p class="lede">
-          A modern follow-on for the legacy EcoSpecies archive, starting with direct ingestion
-          of historical Species Life History text files.
+          A modern follow-on for the legacy EcoSpecies archive, built as an open ecology and
+          biodiversity reference workspace.
+        </p>
+        <p class="hero-context">
+          Use EcoSpecies Atlas for species profiles, habitat evidence, ecological reading, and
+          citation-aware exploration grounded in the migrated legacy corpus.
         </p>
-        <div class="auth-bar">
-          <input id="auth-token" type="password" placeholder="Bearer token for editor access">
-          <button id="auth-save" type="button">Use Token</button>
-          <button id="auth-clear" type="button" class="secondary-button">Clear</button>
-          <p id="auth-status" class="auth-status">Public access</p>
-        </div>
         <div class="hero-stats">
           <div class="stat">
             <span id="species-count">0</span>
@@ -38,6 +49,7 @@
           <div class="panel-header">
             <h2>Species</h2>
             <input id="search" type="search" placeholder="Search common or scientific name">
+            <button id="contributor-create" type="button" class="secondary-button hidden">Create New Draft</button>
             <div id="archive-filter-group" class="archive-filter-group hidden">
               <button type="button" class="archive-filter-button is-active" data-archive-filter="active">Active</button>
               <button type="button" class="archive-filter-button" data-archive-filter="all">All</button>
@@ -66,44 +78,166 @@
                 This record is archived. It is hidden from public endpoints but remains available to editors for audit and recovery.
               </p>
             </header>
-            <section id="editor-panel" class="detail-section editor-panel hidden">
-              <h3>Editor Controls</h3>
-              <label class="editor-label" for="editor-publication-status">Publication Status</label>
-              <select id="editor-publication-status">
-                <option value="draft">Draft</option>
-                <option value="review">Review</option>
-                <option value="published">Published</option>
-              </select>
-              <label class="editor-label" for="editor-summary">Summary</label>
-              <textarea id="editor-summary" rows="5" placeholder="Write a concise executive summary."></textarea>
-              <label class="editor-label" for="editor-notes">Editor Notes</label>
-              <textarea id="editor-notes" rows="4" placeholder="Internal editorial notes"></textarea>
-              <label class="archive-toggle">
-                <input id="editor-is-archived" type="checkbox">
-                <span>Archive this species</span>
-              </label>
-              <div class="editor-actions">
-                <button id="editor-save" type="button">Save Editorial Changes</button>
-                <p id="editor-status" class="editor-status"></p>
-              </div>
-            </section>
-            <section id="audit-panel" class="detail-section hidden">
-              <h3>Audit History</h3>
-              <div id="audit-list" class="audit-list"></div>
-            </section>
             <div id="detail-sections" class="detail-sections"></div>
+            <div class="workflow-panels">
+              <section id="legacy-panel" class="detail-section collapsible-panel collapsed hidden" data-label="Legacy Materials Under Review">
+                <div class="collapsible-header">
+                  <h3>Legacy Materials Under Review</h3>
+                  <button type="button" class="secondary-button collapsible-toggle" data-target="legacy-panel" data-label="Legacy Materials Under Review" aria-expanded="false">
+                    Show Legacy Materials Under Review
+                  </button>
+                </div>
+                <div class="collapsible-body">
+                  <p id="legacy-source-meta" class="editor-status"></p>
+                  <pre id="legacy-source-text" class="legacy-source"></pre>
+                </div>
+              </section>
+              <section id="access-panel" class="detail-section collapsible-panel collapsed" data-label="Access and Contribution">
+                <div class="collapsible-header">
+                  <h3>Access and Contribution</h3>
+                  <button type="button" class="secondary-button collapsible-toggle" data-target="access-panel" data-label="Access and Contribution" aria-expanded="false">
+                    Show Access and Contribution
+                  </button>
+                </div>
+                <div class="collapsible-body">
+                  <div class="auth-bar auth-panel-row">
+                    <input id="auth-token" type="password" placeholder="Bearer token for editor access">
+                    <button id="auth-save" type="button">Use Token</button>
+                    <button id="auth-clear" type="button" class="secondary-button">Clear</button>
+                    <p id="auth-status" class="auth-status">Public access</p>
+                  </div>
+                  <div class="auth-bar contributor-signup auth-panel-row">
+                    <input id="contributor-email" type="email" placeholder="Email for contributor access">
+                    <label class="archive-toggle contributor-age-gate">
+                      <input id="contributor-age-gate" type="checkbox">
+                      <span>I confirm I am at least <span id="contributor-age-label">13</span> years old</span>
+                    </label>
+                    <button id="contributor-register" type="button" class="secondary-button">Become Contributor</button>
+                    <p id="contributor-status" class="auth-status"></p>
+                  </div>
+                </div>
+              </section>
+              <section id="editor-panel" class="detail-section collapsible-panel editor-panel collapsed hidden" data-label="Editing Workflow">
+                <div class="collapsible-header">
+                  <h3>Editing Workflow</h3>
+                  <button type="button" class="secondary-button collapsible-toggle" data-target="editor-panel" data-label="Editing Workflow" aria-expanded="false">
+                    Show Editing Workflow
+                  </button>
+                </div>
+                <div class="collapsible-body">
+                  <label class="editor-label" for="editor-publication-status">Publication Status</label>
+                  <select id="editor-publication-status">
+                    <option value="draft">Draft</option>
+                    <option value="review">Review</option>
+                    <option value="published">Published</option>
+                  </select>
+                  <label class="editor-label" for="editor-notes">Editor Notes</label>
+                  <textarea id="editor-notes" rows="4" placeholder="Internal editorial notes"></textarea>
+                  <label class="archive-toggle">
+                    <input id="editor-is-archived" type="checkbox">
+                    <span>Archive this species</span>
+                  </label>
+                  <div class="editor-actions">
+                    <button id="editor-save" type="button">Save Editorial Changes</button>
+                    <p id="editor-status" class="editor-status"></p>
+                  </div>
+                </div>
+              </section>
+              <section id="document-panel" class="detail-section collapsible-panel editor-panel collapsed hidden" data-label="Metadata and Document Workflow">
+                <div class="collapsible-header">
+                  <h3>Metadata and Document Workflow</h3>
+                  <button type="button" class="secondary-button collapsible-toggle" data-target="document-panel" data-label="Metadata and Document Workflow" aria-expanded="false">
+                    Show Metadata and Document Workflow
+                  </button>
+                </div>
+                <div class="collapsible-body">
+                  <div class="document-panel-header">
+                    <div>
+                      <p class="editor-status">
+                        Markdown is the editable source of truth for hierarchy. Front matter and headings are validated on save.
+                      </p>
+                    </div>
+                    <div class="editor-actions">
+                      <button id="document-save" type="button">Save Document</button>
+                      <p id="document-status" class="editor-status"></p>
+                    </div>
+                  </div>
+                  <label class="editor-label" for="document-markdown">Markdown Source</label>
+                  <textarea id="document-markdown" class="document-editor" rows="18" spellcheck="false"></textarea>
+                  <details class="document-preview-shell" open>
+                    <summary>Outline Preview</summary>
+                    <div id="document-preview" class="document-preview"></div>
+                  </details>
+                </div>
+              </section>
+              <section id="citation-panel" class="detail-section collapsible-panel collapsed hidden" data-label="Review Workflow">
+                <div class="collapsible-header">
+                  <h3>Review Workflow</h3>
+                  <button type="button" class="secondary-button collapsible-toggle" data-target="citation-panel" data-label="Review Workflow" aria-expanded="false">
+                    Show Review Workflow
+                  </button>
+                </div>
+                <div class="collapsible-body">
+                  <div class="document-panel-header">
+                    <div>
+                      <p id="citation-status" class="editor-status">
+                        Extracted bibliography entries and draft BibTeX records.
+                      </p>
+                    </div>
+                    <div class="editor-actions">
+                      <button id="citation-backfill-species" type="button" class="secondary-button hidden">Backfill This Species</button>
+                      <button id="citation-enrich-all" type="button" class="secondary-button hidden">Run Enrichment For All Citations</button>
+                    </div>
+                  </div>
+                  <div id="citation-list" class="citation-list"></div>
+                </div>
+              </section>
+              <section id="audit-panel" class="detail-section collapsible-panel collapsed hidden" data-label="Audit History">
+                <div class="collapsible-header">
+                  <h3>Audit History</h3>
+                  <button type="button" class="secondary-button collapsible-toggle" data-target="audit-panel" data-label="Audit History" aria-expanded="false">
+                    Show Audit History
+                  </button>
+                </div>
+                <div class="collapsible-body">
+                  <div id="audit-list" class="audit-list"></div>
+                </div>
+              </section>
+            </div>
           </article>
         </section>
       </section>
 
       <footer class="footer">
         <p>
-          This migration path preserves attribution for Dr. Peter Rubec, Dr. Diane Blackwood,
+          EcoSpecies Atlas preserves attribution for Dr. Peter Rubec, Dr. Diane Blackwood,
           Dr. Welsbery R. Elsberry, and the Florida Fish and Wildlife Research Institute context
           documented in the legacy project materials.
         </p>
       </footer>
     </main>
+    <section id="citation-match-dialog" class="match-dialog-shell hidden" aria-hidden="true">
+      <div class="match-dialog-backdrop"></div>
+      <article class="match-dialog-card" role="dialog" aria-modal="true" aria-labelledby="citation-match-title">
+        <div class="match-dialog-header">
+          <div>
+            <h2 id="citation-match-title">Citation Candidate Review</h2>
+            <p id="citation-match-status" class="editor-status">Compare the parsed source citation against candidate metadata.</p>
+          </div>
+          <button id="citation-match-close" type="button" class="secondary-button">Close</button>
+        </div>
+        <div class="match-dialog-grid">
+          <section class="detail-section">
+            <h3>Parsed Source Metadata</h3>
+            <div id="citation-match-seed" class="match-seed"></div>
+          </section>
+          <section class="detail-section">
+            <h3>Candidate Matches</h3>
+            <div id="citation-match-candidates" class="match-candidates"></div>
+          </section>
+        </div>
+      </article>
+    </section>
     <script src="./app.js" defer></script>
   </body>
 </html>
diff --git a/apps/web/nginx.conf b/apps/web/nginx.conf
index 29d2394..88408f5 100644
--- a/apps/web/nginx.conf
+++ b/apps/web/nginx.conf
@@ -5,6 +5,10 @@ server {
   root /usr/share/nginx/html;
   index index.html;
 
+  location = /apps/ecospecies {
+    return 301 /apps/ecospecies/;
+  }
+
   location /api/ {
     proxy_pass http://api:8000/api/;
     proxy_http_version 1.1;
@@ -14,19 +18,46 @@ server {
     proxy_set_header X-Forwarded-Proto $scheme;
   }
 
+  location /apps/ecospecies/api/ {
+    rewrite ^/apps/ecospecies/api/(.*)$ /api/$1 break;
+    proxy_pass http://api:8000;
+    proxy_http_version 1.1;
+    proxy_set_header Host $host;
+    proxy_set_header X-Real-IP $remote_addr;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header X-Forwarded-Proto $scheme;
+  }
+
   location /healthz {
     proxy_pass http://api:8000/healthz;
     proxy_http_version 1.1;
     proxy_set_header Host $host;
   }
 
+  location /apps/ecospecies/healthz {
+    proxy_pass http://api:8000/healthz;
+    proxy_http_version 1.1;
+    proxy_set_header Host $host;
+  }
+
   location /readyz {
     proxy_pass http://api:8000/readyz;
     proxy_http_version 1.1;
     proxy_set_header Host $host;
   }
 
+  location /apps/ecospecies/readyz {
+    proxy_pass http://api:8000/readyz;
+    proxy_http_version 1.1;
+    proxy_set_header Host $host;
+  }
+
   location / {
     try_files $uri $uri/ /index.html;
   }
+
+  location /apps/ecospecies/ {
+    rewrite ^/apps/ecospecies/(.*)$ /$1 break;
+    try_files $uri $uri/ /index.html;
+  }
 }
diff --git a/apps/web/styles.css b/apps/web/styles.css
index b5aa5cb..1741846 100644
--- a/apps/web/styles.css
+++ b/apps/web/styles.css
@@ -1,12 +1,12 @@
 :root {
-  --bg: #f4efe6;
-  --paper: rgba(255, 252, 247, 0.78);
-  --ink: #16251f;
-  --muted: #58655f;
-  --accent: #0f766e;
-  --accent-2: #bc6c25;
-  --line: rgba(22, 37, 31, 0.12);
-  --shadow: 0 24px 70px rgba(24, 35, 30, 0.15);
+  --bg: #f4f7fb;
+  --paper: rgba(255, 255, 255, 0.88);
+  --ink: #182433;
+  --muted: #5f6b7d;
+  --accent: #2457a6;
+  --accent-2: #1f7a5a;
+  --line: rgba(24, 36, 51, 0.11);
+  --shadow: 0 24px 70px rgba(33, 52, 84, 0.14);
 }
 
 * {
@@ -15,12 +15,83 @@
 
 body {
   margin: 0;
-  font-family: Georgia, "Times New Roman", serif;
+  font-family: "Segoe UI", "Helvetica Neue", Arial, sans-serif;
   color: var(--ink);
   background:
-    radial-gradient(circle at top left, rgba(15, 118, 110, 0.14), transparent 28%),
-    radial-gradient(circle at top right, rgba(188, 108, 37, 0.16), transparent 24%),
-    linear-gradient(180deg, #f8f4ec, #efe6d7 70%, #e7dcc9);
+    radial-gradient(circle at top left, rgba(36, 87, 166, 0.14), transparent 26%),
+    radial-gradient(circle at top right, rgba(31, 122, 90, 0.12), transparent 24%),
+    linear-gradient(180deg, #f4f7fb, #e4edf6 72%, #d9e6ef);
+}
+
+.site-header {
+  width: min(1320px, calc(100vw - 32px));
+  margin: 0 auto;
+  padding-top: 24px;
+}
+
+.site-header-inner {
+  display: flex;
+  gap: 18px;
+  align-items: center;
+  justify-content: space-between;
+  padding: 18px 22px;
+  border-radius: 24px;
+  backdrop-filter: blur(10px);
+  background: var(--paper);
+  border: 1px solid var(--line);
+  box-shadow: var(--shadow);
+}
+
+.site-brand {
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+}
+
+.site-brand-mark {
+  margin: 0;
+  color: var(--accent);
+  text-transform: uppercase;
+  letter-spacing: 0.18em;
+  font-size: 0.76rem;
+}
+
+.site-brand-link {
+  color: var(--ink);
+  font-size: 1.5rem;
+  font-weight: 700;
+  text-decoration: none;
+}
+
+.site-brand-summary {
+  margin: 0;
+  color: var(--muted);
+  font-size: 0.94rem;
+}
+
+.site-nav {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 10px;
+  justify-content: flex-end;
+}
+
+.site-nav a {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  border-radius: 999px;
+  padding: 11px 16px;
+  text-decoration: none;
+  color: var(--ink);
+  border: 1px solid var(--line);
+  background: rgba(255, 255, 255, 0.72);
+  transition: transform 160ms ease, border-color 160ms ease;
+}
+
+.site-nav a:hover {
+  transform: translateY(-1px);
+  border-color: rgba(15, 118, 110, 0.45);
 }
 
 .page {
@@ -42,6 +113,9 @@ body {
 .hero {
   padding: 28px;
   margin-bottom: 20px;
+  background:
+    linear-gradient(135deg, rgba(255, 255, 255, 0.95), rgba(234, 244, 240, 0.92)),
+    var(--paper);
 }
 
 .eyebrow {
@@ -56,6 +130,7 @@ h1 {
   margin: 0;
   font-size: clamp(2.8rem, 7vw, 5.6rem);
   line-height: 0.92;
+  letter-spacing: -0.03em;
 }
 
 .lede {
@@ -64,6 +139,12 @@ h1 {
   font-size: 1.08rem;
 }
 
+.hero-context {
+  max-width: 68ch;
+  color: var(--muted);
+  line-height: 1.58;
+}
+
 .hero-stats {
   display: flex;
   gap: 16px;
@@ -79,6 +160,15 @@ h1 {
   margin-top: 18px;
 }
 
+.auth-panel-row {
+  margin-top: 0;
+}
+
+.contributor-signup {
+  padding-top: 14px;
+  border-top: 1px solid var(--line);
+}
+
 .auth-bar input {
   min-width: min(360px, 100%);
   flex: 1;
@@ -93,7 +183,7 @@ h1 {
   min-width: 180px;
   padding: 14px 16px;
   border-radius: 18px;
-  background: rgba(255, 255, 255, 0.6);
+  background: linear-gradient(180deg, rgba(255, 255, 255, 0.95), rgba(232, 242, 239, 0.92));
   border: 1px solid var(--line);
 }
 
@@ -158,6 +248,16 @@ input[type="search"] {
   background: rgba(255, 255, 255, 0.9);
 }
 
+input[type="text"],
+input[type="email"],
+input[type="password"] {
+  border: 1px solid var(--line);
+  border-radius: 18px;
+  padding: 12px 14px;
+  font: inherit;
+  background: rgba(255, 255, 255, 0.92);
+}
+
 select,
 textarea,
 button {
@@ -201,7 +301,7 @@ button {
   padding: 14px;
   border-radius: 18px;
   border: 1px solid var(--line);
-  background: linear-gradient(180deg, rgba(255, 255, 255, 0.95), rgba(241, 237, 230, 0.95));
+  background: linear-gradient(180deg, rgba(255, 255, 255, 0.97), rgba(239, 246, 244, 0.94));
   cursor: pointer;
   transition: transform 160ms ease, border-color 160ms ease;
 }
@@ -213,7 +313,7 @@ button {
 
 .species-card-archived {
   border-style: dashed;
-  background: linear-gradient(180deg, rgba(247, 241, 231, 0.98), rgba(233, 226, 214, 0.98));
+  background: linear-gradient(180deg, rgba(243, 247, 249, 0.98), rgba(227, 236, 242, 0.98));
 }
 
 .species-name,
@@ -273,6 +373,32 @@ button {
   display: none;
 }
 
+.match-dialog-shell {
+  position: fixed;
+  inset: 0;
+  z-index: 50;
+}
+
+.match-dialog-backdrop {
+  position: absolute;
+  inset: 0;
+  background: rgba(12, 20, 18, 0.46);
+}
+
+.match-dialog-card {
+  position: relative;
+  z-index: 1;
+  width: min(1180px, calc(100vw - 32px));
+  max-height: calc(100vh - 40px);
+  overflow: auto;
+  margin: 20px auto;
+  padding: 18px;
+  border-radius: 24px;
+  background: #fbf8f1;
+  border: 1px solid var(--line);
+  box-shadow: var(--shadow);
+}
+
 .detail-header {
   padding-bottom: 16px;
   border-bottom: 1px solid var(--line);
@@ -313,6 +439,12 @@ button {
   margin-top: 18px;
 }
 
+.workflow-panels {
+  display: grid;
+  gap: 16px;
+  margin-top: 20px;
+}
+
 .detail-section {
   padding: 16px;
   border-radius: 18px;
@@ -329,6 +461,44 @@ button {
   margin-top: 18px;
 }
 
+.workflow-panels .editor-panel,
+.workflow-panels .detail-section {
+  margin-top: 0;
+}
+
+.collapsible-panel {
+  padding-top: 14px;
+}
+
+.collapsible-header {
+  display: flex;
+  gap: 12px;
+  align-items: center;
+  justify-content: space-between;
+  flex-wrap: wrap;
+}
+
+.collapsible-header h3 {
+  margin-bottom: 0;
+}
+
+.collapsible-body {
+  margin-top: 16px;
+}
+
+.collapsible-panel.collapsed .collapsible-body {
+  display: none;
+}
+
+.document-panel-header {
+  display: flex;
+  gap: 16px;
+  align-items: flex-start;
+  justify-content: space-between;
+  flex-wrap: wrap;
+  margin-bottom: 14px;
+}
+
 .editor-label {
   display: block;
   margin: 0 0 8px;
@@ -349,6 +519,11 @@ button {
   font-weight: 700;
 }
 
+.contributor-age-gate {
+  margin: 0;
+  font-weight: 400;
+}
+
 .archive-toggle input {
   width: 18px;
   height: 18px;
@@ -372,6 +547,149 @@ button {
   gap: 12px;
 }
 
+.citation-list {
+  display: grid;
+  gap: 14px;
+}
+
+.citation-entry {
+  padding: 14px;
+  border-radius: 16px;
+  border: 1px solid var(--line);
+  background: rgba(255, 255, 255, 0.76);
+}
+
+.citation-entry-meta {
+  margin: 0 0 10px;
+  color: var(--muted);
+  font-size: 0.92rem;
+}
+
+.citation-entry-raw {
+  margin: 0 0 12px;
+  line-height: 1.5;
+}
+
+.citation-bibtex,
+.citation-bibtex-editor {
+  font-family: "Courier New", monospace;
+  font-size: 0.9rem;
+  line-height: 1.45;
+}
+
+.citation-abstract-shell {
+  display: grid;
+  gap: 8px;
+  margin: 4px 0 10px;
+}
+
+.citation-detail-shell {
+  display: grid;
+  gap: 8px;
+  margin: 4px 0 10px;
+}
+
+.citation-abstract-display {
+  padding: 10px 12px;
+  border-radius: 12px;
+  border: 1px solid var(--line);
+  background: rgba(15, 118, 110, 0.05);
+}
+
+.citation-detail-display {
+  padding: 10px 12px;
+  border-radius: 12px;
+  border: 1px solid var(--line);
+  background: rgba(255, 255, 255, 0.78);
+}
+
+.match-dialog-header,
+.match-dialog-grid,
+.match-candidate-header,
+.match-candidates,
+.match-candidate-card,
+.match-seed,
+.match-table {
+  display: grid;
+  gap: 12px;
+}
+
+.match-dialog-header {
+  grid-template-columns: minmax(0, 1fr) auto;
+  align-items: start;
+}
+
+.match-dialog-grid {
+  grid-template-columns: minmax(260px, 0.9fr) minmax(0, 1.6fr);
+  margin-top: 16px;
+}
+
+.match-candidate-card {
+  padding: 14px;
+  border-radius: 16px;
+  border: 1px solid var(--line);
+  background: rgba(255, 255, 255, 0.84);
+}
+
+.match-candidate-header {
+  grid-template-columns: minmax(0, 1fr) auto;
+  align-items: baseline;
+}
+
+.match-score {
+  font-weight: 700;
+  color: var(--accent);
+}
+
+.match-table {
+  border: 1px solid var(--line);
+  border-radius: 14px;
+  overflow: hidden;
+}
+
+.match-row {
+  display: grid;
+  grid-template-columns: 120px 110px minmax(0, 1fr) minmax(0, 1fr);
+  gap: 10px;
+  padding: 10px 12px;
+  border-top: 1px solid var(--line);
+  font-size: 0.92rem;
+}
+
+.match-row:first-child {
+  border-top: 0;
+}
+
+.match-row-head {
+  background: rgba(15, 118, 110, 0.08);
+  font-weight: 700;
+}
+
+.match-label {
+  color: var(--muted);
+  font-weight: 700;
+}
+
+.match-status {
+  text-transform: uppercase;
+  letter-spacing: 0.04em;
+  font-size: 0.78rem;
+}
+
+.match-status-exact {
+  color: var(--accent);
+}
+
+.match-status-partial,
+.match-status-seed-missing,
+.match-status-candidate-missing {
+  color: var(--accent-2);
+}
+
+.match-status-conflict {
+  color: #a12626;
+}
+
 .audit-entry {
   padding: 14px;
   border-radius: 16px;
@@ -394,6 +712,62 @@ button {
   line-height: 1.45;
 }
 
+.document-editor,
+.document-preview {
+  font-family: "Courier New", monospace;
+  font-size: 0.92rem;
+  line-height: 1.5;
+}
+
+.document-editor {
+  min-height: 420px;
+  margin-bottom: 14px;
+  white-space: pre;
+  overflow: auto;
+}
+
+.document-preview-shell {
+  border: 1px solid var(--line);
+  border-radius: 18px;
+  background: rgba(255, 255, 255, 0.72);
+  overflow: hidden;
+}
+
+.document-preview-shell summary {
+  cursor: pointer;
+  padding: 12px 16px;
+  font-weight: 700;
+  color: var(--accent);
+}
+
+.document-preview {
+  padding: 0 16px 16px;
+}
+
+.document-preview-empty {
+  color: var(--muted);
+}
+
+.document-preview-list {
+  margin: 0;
+  padding-left: 22px;
+}
+
+.document-preview-list li + li {
+  margin-top: 8px;
+}
+
+.document-preview-metadata {
+  margin: 0 0 14px;
+  padding: 0;
+  list-style: none;
+  color: var(--muted);
+}
+
+.document-preview-metadata li + li {
+  margin-top: 6px;
+}
+
 .diagnostic-list {
   margin: 0;
   padding-left: 18px;
@@ -403,6 +777,100 @@ button {
   margin-top: 8px;
 }
 
+.structured-node {
+  display: grid;
+  gap: 12px;
+  background: linear-gradient(180deg, rgba(255, 255, 255, 0.84), rgba(242, 247, 252, 0.88));
+}
+
+.structured-node + .structured-node {
+  margin-top: 4px;
+}
+
+.structured-node h3,
+.structured-node h4,
+.structured-node h5,
+.structured-node h6 {
+  line-height: 1.18;
+  letter-spacing: -0.01em;
+}
+
+.structured-node-body {
+  margin: 0;
+  line-height: 1.58;
+  color: var(--ink);
+}
+
+.structured-node-children {
+  display: grid;
+  gap: 12px;
+  padding: 4px 0 0 18px;
+  border-left: 2px solid rgba(36, 87, 166, 0.12);
+}
+
+.public-citation-list {
+  display: grid;
+  gap: 14px;
+}
+
+.public-bibliography-actions {
+  display: flex;
+  gap: 12px;
+  align-items: center;
+  flex-wrap: wrap;
+}
+
+.public-bibliography-note {
+  margin: 0;
+  color: var(--muted);
+  font-size: 0.92rem;
+}
+
+.public-citation-entry {
+  display: grid;
+  gap: 8px;
+  padding: 14px;
+  border-radius: 16px;
+  border: 1px solid var(--line);
+  background: rgba(255, 255, 255, 0.76);
+}
+
+.public-citation-text,
+.public-citation-meta,
+.public-citation-links,
+.public-citation-abstract {
+  margin: 0;
+}
+
+.public-citation-text {
+  line-height: 1.56;
+}
+
+.public-citation-meta,
+.public-citation-links {
+  color: var(--muted);
+  font-size: 0.92rem;
+}
+
+.public-citation-links a {
+  color: var(--accent);
+}
+
+.public-citation-abstract {
+  padding-top: 2px;
+  color: var(--muted);
+  line-height: 1.58;
+}
+
+.legacy-source {
+  max-height: 28rem;
+  overflow: auto;
+  padding: 14px;
+  border-radius: 16px;
+  border: 1px solid var(--line);
+  background: rgba(255, 255, 255, 0.76);
+}
+
 pre {
   margin: 0;
   white-space: pre-wrap;
@@ -417,6 +885,15 @@ pre {
 }
 
 @media (max-width: 960px) {
+  .site-header-inner {
+    flex-direction: column;
+    align-items: stretch;
+  }
+
+  .site-nav {
+    justify-content: flex-start;
+  }
+
   .workspace {
     grid-template-columns: 1fr;
   }
@@ -424,4 +901,12 @@ pre {
   .species-list {
     max-height: 40vh;
   }
+
+  .match-dialog-grid {
+    grid-template-columns: 1fr;
+  }
+
+  .match-row {
+    grid-template-columns: 1fr;
+  }
 }
diff --git a/docker-compose.yml b/docker-compose.yml
index 28889c9..52200ff 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,5 +1,6 @@
 services:
   db:
+    container_name: ecospecies-db
     image: postgres:16-alpine
     environment:
       POSTGRES_DB: ecospecies
@@ -17,6 +18,7 @@ services:
       - postgres_data:/var/lib/postgresql/data
 
   importer:
+    container_name: ecospecies-importer
     image: python:3.12-slim
     depends_on:
       db:
@@ -30,11 +32,12 @@ services:
     command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-import.sh"]
     volumes:
       - .:/workspace
-      - ../01-legacy-code-and-data:/legacy-data:ro
+      - ${ECOSPECIES_LEGACY_DATA_DIR:-../legacy-corpus}:/legacy-data:ro
       - python_venv:/workspace/.docker/venv
       - pip_cache:/root/.cache/pip
 
   api:
+    container_name: ecospecies-api
     image: python:3.12-slim
     restart: unless-stopped
     depends_on:
@@ -56,11 +59,12 @@ services:
       - "${ECOSPECIES_API_PORT:-8000}:8000"
     volumes:
       - .:/workspace
-      - ../01-legacy-code-and-data:/legacy-data:ro
+      - ${ECOSPECIES_LEGACY_DATA_DIR:-../legacy-corpus}:/legacy-data:ro
       - python_venv:/workspace/.docker/venv
       - pip_cache:/root/.cache/pip
 
   web:
+    container_name: ecospecies-web
     image: nginx:1.27-alpine
     restart: unless-stopped
     depends_on:
diff --git a/docs/citegeist-review-notes.md b/docs/citegeist-review-notes.md
new file mode 100644
index 0000000..981d5d9
--- /dev/null
+++ b/docs/citegeist-review-notes.md
@@ -0,0 +1,110 @@
+## CiteGeist Review Notes
+
+These notes capture parser issues seen while integrating CiteGeist-style extraction into EcoSpecies.
+
+### Report-style references
+
+Observed failure shape:
+
+- references like `Daniell, W.C. 1872. Letters referring ... Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.`
+- extracted `title` may contain the full raw bibliography string
+- abbreviated venue names such as `Comm. Rept.` are not separated cleanly from the title
+
+Suggested upstream change in `citegeist.extract`:
+
+- add a report-style parser path after year detection
+- prefer sentence-boundary venue detection before naive keyword splits so words like `report` inside a real title do not trigger an early cut
+- support abbreviation-heavy venue starters such as:
+  - `comm. rept.`
+  - `rept.`
+  - `proc.`
+  - `occas. pap.`
+  - `bulletin`
+  - `bull.`
+  - `memoir`
+- strip trailing volume/page blobs like `2: 387-390` from the venue field
+- when a first parse leaves a partial venue stub such as `Occas`, reparse the full raw reference line and prefer the fuller repaired venue/title split
+
+### Placeholder title merge behavior
+
+Observed failure shape:
+
+- a raw bibliography string may survive as `title` even after DOI/title resolution finds a better title
+
+Suggested upstream change in `citegeist.resolve.merge_entries_with_conflicts`:
+
+- treat titles that look like raw bibliography strings as placeholders
+- example heuristic:
+  - starts with `Surname, ... YEAR.`
+  - unusually long for a title
+  - contains a resolved shorter title as a substring after punctuation normalization
+
+### Legacy note deduplication
+
+Observed failure shape:
+
+- note fragments like `ecospecies_reference_number = {160}` can be appended more than once downstream when re-merging enriched metadata
+
+Suggested upstream change:
+
+- when joining note fragments, split on `;`, normalize whitespace, and dedupe per fragment rather than per whole note string
+
+### Unresolved entries should still refresh local parses
+
+Observed failure shape:
+
+- parser improvements may correctly rebuild `title`, venue, `volume`, `number`, and `pages`
+- but if no remote metadata source matches, the stored draft BibTeX can remain unchanged unless unresolved enrichment also writes the refreshed local seed back out
+
+Suggested upstream change:
+
+- unresolved enrichment should still return the rebuilt local draft entry
+- keep `citation_key`, normalized text, and draft BibTeX synchronized with the current local parser even when resolver status remains `unresolved`
+
+### Returned metadata not carried through
+
+Observed concern:
+
+- resolver/source payloads may include bibliographic details such as:
+  - `volume`
+  - `issue` / BibTeX `number`
+  - `page` / BibTeX `pages`
+- these should be preserved into the BibTeX entry whenever available
+
+Current note:
+
+- CiteGeist Crossref mapping already includes `volume`, `number`, and `pages`
+- verify that all resolver paths, storage round-trips, and exports preserve those fields consistently
+- OpenAlex/DataCite mappings should also be checked for analogous bibliographic fields in `biblio` / attribute payloads
+
+### False-positive title-search acceptance
+
+Observed failure shape:
+
+- title search can return a thematically related but bibliographically different work
+- downstream acceptance may keep some seed fields while adopting conflicting DOI/title/volume/pages from the returned match
+- this is especially risky for historical references with sparse or abbreviated venue names
+
+Suggested upstream change in `citegeist.resolve` and any title-search ranking path:
+
+- do not fall back to the first search hit when no strong title match exists
+- prefer exact or near-exact title matches only
+- reject a candidate when structured seed metadata conflicts on strong fields such as:
+  - `year`
+  - venue / journal
+  - `volume`
+  - `number`
+  - `pages`
+- treat those fields as match-validation inputs, not just merge-time metadata
+
+### OpenAlex null-source handling
+
+Observed failure shape:
+
+- some OpenAlex works have `primary_location` present but `source: null`
+- downstream mapping can crash if it assumes `source` is always a dictionary
+
+Suggested upstream change:
+
+- treat null `source` payloads as empty dictionaries
+- continue mapping title, year, DOI, and `biblio` fields even when venue/source is missing
diff --git a/docs/dc-orig.yml b/docs/dc-orig.yml
new file mode 100644
index 0000000..0f0c9b0
--- /dev/null
+++ b/docs/dc-orig.yml
@@ -0,0 +1,89 @@
+services:
+  db:
+    image: postgres:16-alpine
+    restart: unless-stopped
+    environment:
+      POSTGRES_DB: ${ECOSPECIES_DB_NAME:-ecospecies}
+      POSTGRES_USER: ${ECOSPECIES_DB_USER:-ecospecies}
+      POSTGRES_PASSWORD: ${ECOSPECIES_DB_PASSWORD:?set ECOSPECIES_DB_PASSWORD}
+      PGDATA: /var/lib/postgresql/data/pgdata
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${ECOSPECIES_DB_USER:-ecospecies} -d ${ECOSPECIES_DB_NAME:-ecospecies}"]
+      interval: 5s
+      timeout: 5s
+      retries: 10
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+
+  importer:
+    image: python:3.12-slim
+    restart: "no"
+    depends_on:
+      db:
+        condition: service_healthy
+    working_dir: /workspace
+    environment:
+      ECOSPECIES_DATA_DIR: ${ECOSPECIES_DATA_DIR:-/legacy-data/InputFiles - TXT}
+      ECOSPECIES_DATABASE_URL: postgresql+psycopg://${ECOSPECIES_DB_USER:-ecospecies}:${ECOSPECIES_DB_PASSWORD}@db:5432/${ECOSPECIES_DB_NAME:-ecospecies}
+      ECOSPECIES_VENV_DIR: /workspace/.docker/venv
+      PYTHONPATH: /workspace/apps/api/src
+    command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-import.sh"]
+    volumes:
+      - ..:/workspace
+      - ${ECOSPECIES_LEGACY_DATA_DIR:-../path-to-legacy-corpus}:/legacy-data:ro
+      - python_venv:/workspace/.docker/venv
+      - pip_cache:/root/.cache/pip
+
+  api:
+    image: python:3.12-slim
+    restart: unless-stopped
+    depends_on:
+      db:
+        condition: service_healthy
+      importer:
+        condition: service_completed_successfully
+    working_dir: /workspace
+    environment:
+      ECOSPECIES_DATA_DIR: ${ECOSPECIES_DATA_DIR:-/legacy-data/InputFiles - TXT}
+      ECOSPECIES_DATABASE_URL: postgresql+psycopg://${ECOSPECIES_DB_USER:-ecospecies}:${ECOSPECIES_DB_PASSWORD}@db:5432/${ECOSPECIES_DB_NAME:-ecospecies}
+      ECOSPECIES_HOST: 0.0.0.0
+      ECOSPECIES_PORT: "8000"
+      ECOSPECIES_AUTH_TOKENS: ${ECOSPECIES_AUTH_TOKENS:-}
+      ECOSPECIES_VENV_DIR: /workspace/.docker/venv
+      PYTHONPATH: /workspace/apps/api/src
+    command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-api.sh"]
+    volumes:
+      - ..:/workspace
+      - ${ECOSPECIES_LEGACY_DATA_DIR:-../path-to-legacy-corpus}:/legacy-data:ro
+      - python_venv:/workspace/.docker/venv
+      - pip_cache:/root/.cache/pip
+
+  web:
+    image: nginx:1.27-alpine
+    restart: unless-stopped
+    depends_on:
+      api:
+        condition: service_started
+    labels:
+      - "traefik.enable=true"
+      - "traefik.docker.network=${TRAEFIK_NETWORK:-traefik-network}"
+      - "traefik.http.routers.ecospecies-atlas.rule=Host(`${ECOSPECIES_HOSTNAME:?set ECOSPECIES_HOSTNAME}`)"
+      - "traefik.http.routers.ecospecies-atlas.entrypoints=${TRAEFIK_ENTRYPOINTS:-websecure}"
+      - "traefik.http.routers.ecospecies-atlas.tls.certresolver=${TRAEFIK_CERTRESOLVER:-myresolver}"
+      - "traefik.http.services.ecospecies-atlas.loadbalancer.server.port=80"
+    volumes:
+      - ../apps/web:/usr/share/nginx/html:ro
+      - ../apps/web/nginx.conf:/etc/nginx/conf.d/default.conf:ro
+    networks:
+      - default
+      - traefik-network
+
+volumes:
+  postgres_data:
+  python_venv:
+  pip_cache:
+
+networks:
+  traefik-network:
+    external: true
+    name: ${TRAEFIK_NETWORK:-traefik-network}
diff --git a/docs/docker-compose-traefik.env.example b/docs/docker-compose-traefik.env.example
new file mode 100644
index 0000000..c9c13b7
--- /dev/null
+++ b/docs/docker-compose-traefik.env.example
@@ -0,0 +1,20 @@
+# Required
+ECOSPECIES_HOSTNAME=example.org
+ECOSPECIES_BASE_PATH=/apps/ecospecies
+ECOSPECIES_DB_PASSWORD=replace-with-strong-password
+
+# Optional database settings
+ECOSPECIES_DB_NAME=ecospecies
+ECOSPECIES_DB_USER=ecospecies
+
+# Optional application settings
+ECOSPECIES_AUTH_TOKENS=
+ECOSPECIES_DATA_DIR=/workspace/input-data/InputFiles
+
+# Optional host path to the legacy corpus if it is not at ../path-to-legacy-corpus
+ECOSPECIES_LEGACY_DATA_DIR=../path-to-legacy-corpus
+
+# Optional Traefik settings
+TRAEFIK_NETWORK=traefik-network
+TRAEFIK_ENTRYPOINTS=websecure
+TRAEFIK_CERTRESOLVER=myresolver
diff --git a/docs/docker-compose-traefik.yml b/docs/docker-compose-traefik.yml
new file mode 100644
index 0000000..70f425e
--- /dev/null
+++ b/docs/docker-compose-traefik.yml
@@ -0,0 +1,93 @@
+services:
+  db:
+    container_name: ecospecies-db
+    image: postgres:16-alpine
+    restart: unless-stopped
+    environment:
+      POSTGRES_DB: ${ECOSPECIES_DB_NAME:-ecospecies}
+      POSTGRES_USER: ${ECOSPECIES_DB_USER:-ecospecies}
+      POSTGRES_PASSWORD: ${ECOSPECIES_DB_PASSWORD:?set ECOSPECIES_DB_PASSWORD}
+      PGDATA: /var/lib/postgresql/data/pgdata
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${ECOSPECIES_DB_USER:-ecospecies} -d ${ECOSPECIES_DB_NAME:-ecospecies}"]
+      interval: 5s
+      timeout: 5s
+      retries: 10
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+
+  importer:
+    container_name: ecospecies-importer
+    image: python:3.12-slim
+    restart: "no"
+    depends_on:
+      db:
+        condition: service_healthy
+    working_dir: /workspace
+    environment:
+      ECOSPECIES_DATA_DIR: ${ECOSPECIES_DATA_DIR:-/workspace/input-data/InputFiles}
+      ECOSPECIES_DATABASE_URL: postgresql+psycopg://${ECOSPECIES_DB_USER:-ecospecies}:${ECOSPECIES_DB_PASSWORD}@db:5432/${ECOSPECIES_DB_NAME:-ecospecies}
+      ECOSPECIES_VENV_DIR: /workspace/.docker/venv
+      PYTHONPATH: /workspace/apps/api/src
+    command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-import.sh"]
+    volumes:
+      - ..:/workspace
+      - ${ECOSPECIES_LEGACY_DATA_DIR:-../path-to-legacy-corpus}:/legacy-data:ro
+      - python_venv:/workspace/.docker/venv
+      - pip_cache:/root/.cache/pip
+
+  api:
+    container_name: ecospecies-api
+    image: python:3.12-slim
+    restart: unless-stopped
+    depends_on:
+      db:
+        condition: service_healthy
+      importer:
+        condition: service_completed_successfully
+    working_dir: /workspace
+    environment:
+      ECOSPECIES_DATA_DIR: ${ECOSPECIES_DATA_DIR:-/workspace/input-data/InputFiles}
+      ECOSPECIES_DATABASE_URL: postgresql+psycopg://${ECOSPECIES_DB_USER:-ecospecies}:${ECOSPECIES_DB_PASSWORD}@db:5432/${ECOSPECIES_DB_NAME:-ecospecies}
+      ECOSPECIES_HOST: 0.0.0.0
+      ECOSPECIES_PORT: "8000"
+      ECOSPECIES_AUTH_TOKENS: ${ECOSPECIES_AUTH_TOKENS:-}
+      ECOSPECIES_VENV_DIR: /workspace/.docker/venv
+      PYTHONPATH: /workspace/apps/api/src
+    command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-api.sh"]
+    volumes:
+      - ..:/workspace
+      - ${ECOSPECIES_LEGACY_DATA_DIR:-/input-data}:/legacy-data:ro
+      - python_venv:/workspace/.docker/venv
+      - pip_cache:/root/.cache/pip
+
+  web:
+    container_name: ecospecies-web
+    image: nginx:1.27-alpine
+    restart: unless-stopped
+    depends_on:
+      api:
+        condition: service_started
+    labels:
+      - "traefik.enable=true"
+      - "traefik.docker.network=${TRAEFIK_NETWORK:-traefik-network}"
+      - "traefik.http.routers.ecospecies-atlas.rule=Host(`${ECOSPECIES_HOSTNAME:?set ECOSPECIES_HOSTNAME}`) && PathPrefix(`${ECOSPECIES_BASE_PATH:-/}`)"
+      - "traefik.http.routers.ecospecies-atlas.entrypoints=${TRAEFIK_ENTRYPOINTS:-websecure}"
+      - "traefik.http.routers.ecospecies-atlas.tls.certresolver=${TRAEFIK_CERTRESOLVER:-myresolver}"
+      - "traefik.http.services.ecospecies-atlas.loadbalancer.server.port=80"
+    volumes:
+      - ../apps/web:/usr/share/nginx/html:ro
+      - ../apps/web/nginx.conf:/etc/nginx/conf.d/default.conf:ro
+    networks:
+      - default
+      - traefik-network
+
+volumes:
+  postgres_data:
+  python_venv:
+  pip_cache:
+
+networks:
+  traefik-network:
+    external: true
+    name: ${TRAEFIK_NETWORK:-traefik-network}
diff --git a/docs/postgres-backup.md b/docs/postgres-backup.md
new file mode 100644
index 0000000..2187ee9
--- /dev/null
+++ b/docs/postgres-backup.md
@@ -0,0 +1,48 @@
+# PostgreSQL Backup Notes
+
+This note applies to deployments that use the PostgreSQL volume defined by the Compose stack, including the Traefik deployment variant.
+
+## What Needs Backup
+
+At minimum, back up:
+
+- the PostgreSQL data volume
+- the deployment env file that contains the database credentials
+
+For the Traefik deployment variant, that usually means:
+
+- the Docker volume `postgres_data`
+- `docs/docker-compose-traefik.env`
+
+## Logical Backup
+
+From the repository root, create a SQL dump with:
+
+```bash
+./scripts/backup-postgres.sh
+```
+
+To write to a specific file:
+
+```bash
+./scripts/backup-postgres.sh /path/to/ecospecies-backup.sql
+```
+
+## Restore From Logical Backup
+
+Restore a SQL dump with:
+
+```bash
+./scripts/restore-postgres.sh /path/to/ecospecies-backup.sql
+```
+
+## Volume-Level Backup
+
+If the host backup system can snapshot Docker volumes safely, include the PostgreSQL volume in that schedule. A volume snapshot is useful for full recovery, but a logical dump is still recommended for portability and validation.
+
+## Operational Guidance
+
+- Run backups on a schedule instead of relying on ad hoc dumps.
+- Test restore procedures before relying on the backup policy.
+- Keep backup artifacts outside the live Docker host when possible.
+- The backup and restore scripts default to `docs/docker-compose-traefik.env` and `docs/docker-compose-traefik.yml`, but both can be overridden with `ECOSPECIES_ENV_FILE` and `ECOSPECIES_COMPOSE_FILE`.
diff --git a/docs/roadmap.md b/docs/roadmap.md
index 7d25664..cbdaa49 100644
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@@ -1,5 +1,22 @@
 # EcoSpecies Modernization Roadmap
 
+## Current Status
+
+As of 2026-03-27, the repo is no longer at the pure planning stage. The following pieces are already implemented and working in the live stack:
+
+- Docker Compose deployment with explicit `ecospecies-...` container names
+- path-based hosting support for `/apps/ecospecies`
+- in-repo-only source directory resolution with safe path validation
+- legacy SLH ingest into PostgreSQL-backed species, sections, citations, audit, and document records
+- editor/admin workflows for draft, review, publish, archive, and audit history
+- contributor registration and draft-authoring workflow with token-based access
+- structured Markdown document storage and editor/API round-trip
+- persisted taxon identifier scaffolding with legacy identifiers separated from future-facing external identifiers
+- citation extraction, review, enrichment, batch enrichment, candidate matching, and reviewed-candidate selection/addition
+- citation persistence back into the structured Markdown source of truth
+
+The roadmap below has been updated to reflect that actual state.
+
 ## Target Product
 
 Create a Docker Compose-based, open-source EcoSpecies successor that:
@@ -31,48 +48,91 @@ Create a Docker Compose-based, open-source EcoSpecies successor that:
 
 ### Phase 0: Discovery and migration planning
 
+Status: completed
+
 - Inventory legacy assets and user-facing capabilities.
 - Capture the replacement architecture and ingestion strategy.
 - Define acknowledgements, provenance, and licensing boundaries.
 
 ### Phase 1: Ingestion foundation
 
+Status: substantially complete, with parser refinement ongoing
+
 - Parse legacy `.txt` SLH inputs into structured JSON records.
-- Normalize common metadata: title, scientific name, common name, FLELMR code, headings, references.
+- Normalize common metadata: title, scientific name, common name, FLELMR/EcoSpecies code, headings, references.
 - Create ingest diagnostics to flag malformed files and missing metadata.
+- Continue parser refinement for legacy edge cases in headings, citations, and historical bibliography formats.
 
 ### Phase 2: Public read experience
 
+Status: implemented baseline
+
 - Species listing and search.
 - Species detail view with section navigation.
 - Provenance and acknowledgement display.
 - Summary metrics on corpus coverage.
+- Path-based deployment under `/apps/ecospecies`.
 
-### Phase 3: Structured persistence
+### Phase 3: Structured persistence and editorial workflow
 
-- Move parsed content into PostgreSQL.
-- Add editor-safe import jobs and audit metadata.
-- Preserve raw source alongside normalized records.
-- Establish authentication and role-based access for editor and admin workflows.
-- Add persisted editorial workflow state for draft, review, and published records.
-- Make document sections individually addressable for editor review and revision, with audit history for section-level changes.
+Status: implemented baseline, with editor UX still maturing
 
-### Phase 4: Linkages and visualization
+- PostgreSQL-backed persistence for species, sections, citations, documents, taxon identifiers, and audit history.
+- Editor-safe import jobs and audit metadata.
+- Raw-source preservation alongside normalized records.
+- Authentication and role-based access for admin/editor/contributor workflows.
+- Persisted editorial workflow state for draft, review, published, and archived records.
+- Structured Markdown document storage and round-trip editing.
+- Citation review, enrichment, candidate selection, and reviewed-candidate addition.
+- Contributor draft creation and owner-scoped editing.
+
+### Phase 4: Standards-aware identity and bibliography
+
+Status: partially implemented
+
+- Preserve legacy local identifiers as provenance.
+- Persist taxon identifiers separately from legacy identifiers.
+- Expose `legacy_identifiers`, `taxon_identifiers`, and `primary_taxon_*` API fields.
+- Persist structured citation records with DOI/OpenAlex/DataCite-style enrichment fields.
+- Continue toward multi-authority identifier review, richer citation entities, and CiteGeist-backed bibliography expansion.
+
+### Phase 5: Editor ergonomics and advanced review
+
+Status: in progress
+
+- Structured Markdown editor is live.
+- Citation match-review dialog is live.
+- Remaining work:
+  - CodeMirror-based Markdown editor with folding
+  - inline parser diagnostics in the editor
+  - richer citation diff/review affordances
+  - clearer document-node and citation provenance in the UI
+
+### Phase 6: Linkages and visualization
+
+Status: not started
 
 - Model predator/prey, habitat, and ecological association edges.
 - Add graph endpoints and species-relationship views.
 - Support public-friendly visual explanations and expert filters.
 
-### Phase 5: Reports and export
+### Phase 7: Reports and export
 
-- Recreate legacy-like text/RTF export.
-- Add machine-readable export formats such as JSON and Markdown.
-- Support FLELMR-oriented authoring/export profiles.
+Status: partially implemented
 
-### Phase 6: Assisted research workflows
+- JSON and Markdown exports exist through the API/document model.
+- Structured Markdown is now the primary human-readable editor/export format.
+- Remaining work:
+  - recreate legacy-like text/RTF export
+  - support export profiles for legacy compatibility and standards-forward outputs
+  - improve citation/bibliography export fidelity
+
+### Phase 8: Assisted research workflows
+
+Status: planned
 
 - Add local-LLM-assisted extraction and drafting in a human-review loop.
-- Integrate bibliography tooling for citation consolidation.
+- Integrate bibliography tooling for citation consolidation and topic expansion.
 - Support candidate-species intake for records not yet in the historical corpus.
 - Restrict assisted drafting and publication actions to authenticated editorial roles.
 
@@ -84,6 +144,9 @@ Initial core entities:
 - `source_document`
 - `document_section`
 - `citation`
+- `taxon_identifier`
+- `citation_identifier`
+- `bibliography_topic`
 - `taxon`
 - `linkage`
 - `media_asset`
@@ -95,6 +158,7 @@ Key design rules:
 - retain provenance and import timestamps
 - separate public published records from draft/editor states
 - make sections addressable for citation and graph linking
+- prefer a canonical document AST over direct projection from free-form source text
 
 ## LLM Extension Strategy
 
@@ -103,6 +167,8 @@ Use local models only for assistive tasks, never silent publication:
 - extracting candidate structured fields from new SLH text
 - suggesting missing headings or linkage labels
 - clustering similar citations
+- resolving bibliography entries toward DOI/OpenAlex/DataCite where available
+- treating local legacy codes as provenance, not canonical identifiers
 - drafting summaries for editor review
 
 Guardrails:
@@ -111,16 +177,19 @@ Guardrails:
 - all generated content is marked as draft
 - every automated extraction stores source spans where possible
 
-## Development Roadmap
+## Near-Term Priorities
 
-1. Implement a thin ingestion API over the legacy text corpus.
-2. Build a responsive browser UI for listing and viewing species.
-3. Add a persistent PostgreSQL-backed ingest store.
-4. Introduce export and visualization services.
-5. Add editorial workflows and local-LLM assistance.
+1. Add CodeMirror-based folding and structure-aware editing to the Markdown document editor.
+2. Expand taxon identifier review workflows for WoRMS, GBIF, Catalogue of Life, and related authorities.
+3. Deepen citation quality controls, including better parsed-field visibility and stricter/manual review loops where resolver confidence is weak.
+4. Add CiteGeist-style topic expansion and bibliography-suggestion review for under-cited species.
+5. Improve document export fidelity so reviewed citations and standards-based identifiers are clearly represented in Markdown and downstream exports.
+6. Begin the first ecological-linkage data model and API endpoints once citation/identifier workflows stabilize.
 
 ## Definition Of Done For The Initial Milestone
 
 - `docker compose up` starts a working API and frontend.
-- The system can enumerate the legacy corpus and show parsed species detail for at least one real SLH file.
-- Project docs describe the migration approach, target architecture, and next phases.
+- The system can enumerate the legacy corpus and show parsed species detail for real SLH files.
+- Editors can curate structured Markdown documents and citations through authenticated workflows.
+- Contributors can register, create drafts, and edit only their own submissions.
+- Project docs describe both the implemented modernization state and the next phases.
diff --git a/docs/standards-migration-plan.md b/docs/standards-migration-plan.md
new file mode 100644
index 0000000..99dbe6a
--- /dev/null
+++ b/docs/standards-migration-plan.md
@@ -0,0 +1,315 @@
+# EcoSpecies Standards Migration Plan
+
+## Problem
+
+The current EcoSpecies ingest and document model still treats legacy local fields such as `FLELMR code` / `species_code` as if they were primary identifiers. That is useful for historical provenance, but it is the wrong long-term center of gravity for a broader, modern biodiversity knowledge system.
+
+The same problem exists for citations:
+
+- legacy plaintext reference blocks are treated as local document text,
+- citation identity is weak or missing,
+- bibliography growth is tied to what happened to appear in the historical SLH file.
+
+The new system should preserve legacy local identifiers and references, but it should not be structurally bound to them.
+
+## Direction
+
+Treat legacy local codes and freeform references as import-era artifacts, not canonical future-facing identifiers.
+
+Going forward, EcoSpecies should prefer broadly recognized identifiers and registries:
+
+- taxonomic name authority and taxon identifiers:
+  - Catalogue of Life IDs and release DOIs
+  - GBIF taxon keys
+  - WoRMS AphiaIDs for marine taxa
+  - ITIS TSNs where relevant
+  - optional NCBI Taxonomy IDs for research interoperability
+- literature and dataset identifiers:
+  - DOI as the primary publication/dataset identifier
+  - ISBN/ISSN where DOI is absent
+  - OpenAlex IDs and DataCite metadata as enrichment layers
+- contributor identity:
+  - email-based local contributor accounts now
+  - optional ORCID linkage later for editor and contributor identity
+
+The system should be marine-forward because that matches the historical corpus, but not marine-exclusive. Identifier strategy should therefore be authority-aware rather than tied to a single domain-specific registry.
+
+## Authority Selection Strategy
+
+Choose the primary taxon authority by best-fit coverage, not by a single global rule.
+
+- marine taxa:
+  - prefer WoRMS AphiaID as primary when confidently matched
+  - retain GBIF and Catalogue of Life as crosswalks
+- non-marine or mixed-domain taxa:
+  - prefer Catalogue of Life or GBIF as primary, depending on match quality and coverage
+  - retain ITIS and other relevant identifiers as crosswalks
+- unresolved or conflicting cases:
+  - store all candidate identifiers
+  - require editorial review before a primary identifier is asserted
+
+This keeps the project ready for terrestrial expansion without discarding the value of WoRMS for the present corpus.
+
+## Important Taxonomic Note
+
+PhyloCode is relevant for clade naming, not as a general-purpose replacement for species-level registry IDs. It should not become the primary EcoSpecies species identifier layer. It may be useful later for clade-aware ontology and higher-level phylogenetic naming, but not as the main substitute for local `species_code` values.
+
+## Core Design Rules
+
+1. Legacy local identifiers remain preserved exactly as imported.
+2. Canonical taxon identity becomes multi-authority, not single-local-code.
+3. Citations become first-class structured entities, not just text inside a section.
+4. Bibliographies can be extended by topic and citation graph, not only by source-document inheritance.
+5. Exports keep provenance visible so readers can distinguish legacy source metadata from normalized external identifiers.
+
+## Schema Changes
+
+### Species metadata
+
+Retain `flelmr_code` for provenance, but demote it to a legacy metadata field.
+
+Add a taxon-identity layer:
+
+- `taxon_name_usage`
+- `taxon_identifier`
+- `taxon_authority`
+- `taxon_match_review`
+
+Suggested fields:
+
+- `taxon_identifier.authority`
+- `taxon_identifier.identifier`
+- `taxon_identifier.rank`
+- `taxon_identifier.label`
+- `taxon_identifier.is_primary`
+- `taxon_identifier.source_url`
+- `taxon_identifier.asserted_by`
+- `taxon_identifier.match_confidence`
+- `taxon_identifier.review_status`
+
+Examples:
+
+- `authority = "worms", identifier = "159059", label = "AphiaID"`
+- `authority = "gbif", identifier = "2290910", label = "taxonKey"`
+- `authority = "col", identifier = "5T7L7", label = "taxonID"`
+- `authority = "itis", identifier = "161989", label = "TSN"`
+- `authority = "legacy-ecospecies", identifier = "5192", label = "FLELMR"`
+
+### Citation model
+
+Move from section text to structured bibliography entities:
+
+- `citation`
+- `citation_identifier`
+- `citation_relation`
+- `species_citation`
+- `document_node_citation`
+- `bibliography_topic`
+
+Suggested citation identifier types:
+
+- DOI
+- ISBN
+- ISSN
+- PMID
+- arXiv
+- OpenAlex
+- URL
+
+## Markdown / AST Changes
+
+Update the constrained Markdown profile so metadata stops implying that `species_code` is canonical.
+
+Replace the current front matter recommendation:
+
+```md
+species_code: 5192
+```
+
+with a provenance-oriented shape:
+
+```md
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 5192
+    label: FLELMR
+taxon_identifiers:
+  - authority: worms
+    identifier: 159059
+    label: AphiaID
+    primary: true
+  - authority: gbif
+    identifier: 2290910
+    label: taxonKey
+```
+
+Also add explicit bibliography sections:
+
+```md
+## References
+
+- id: doi:10.1000/example
+  text: Smith, J. 2024. Example paper...
+  relation: cites
+
+## Suggested Reading
+
+- topic: estuarine ecology
+```
+
+The AST should preserve:
+
+- legacy identifiers
+- normalized taxon identifiers
+- structured references
+- topic links used for bibliography expansion
+
+## Import Pipeline Changes
+
+### Species identity
+
+Import should produce:
+
+1. raw imported name fields,
+2. legacy local identifiers,
+3. unresolved candidate taxon identifiers,
+4. optional matched external identifiers,
+5. a review state for unresolved or conflicting authority matches.
+
+Do not block ingest if no external authority match exists. Store the unresolved state explicitly.
+
+Primary identifier assignment should be determined by:
+
+1. domain fit of the authority
+2. confidence of the match
+3. editorial review status
+4. future ability to crosswalk to other authorities
+
+### Citations
+
+Split citation processing into stages:
+
+1. detect bibliography/reference sections in the imported SLH text,
+2. extract plaintext reference strings,
+3. convert plaintext references into draft structured entries,
+4. enrich identifiers and metadata,
+5. assign accepted citations back to species and document nodes,
+6. optionally expand bibliography by topic and citation graph.
+
+## CiteGeist Integration
+
+`../CiteGeist` is a strong fit for this migration.
+
+Observed capabilities in that repo already cover much of what EcoSpecies needs:
+
+- extracting references from plaintext,
+- converting rough references into draft structured entries,
+- DOI/Crossref/DataCite/OpenAlex enrichment,
+- citation graph expansion,
+- topic-based bibliography expansion,
+- duplicate clustering and canonicalization.
+
+### Recommended integration boundary
+
+Do not embed CiteGeist logic directly into the EcoSpecies parser.
+
+Instead:
+
+1. EcoSpecies exports candidate plaintext references and topic phrases.
+2. CiteGeist processes and enriches them into structured bibliography data.
+3. EcoSpecies imports reviewed citation outputs into its own `citation` tables.
+
+### First integration targets
+
+- species-level bibliography cleanup from `References` sections
+- DOI resolution and identifier assignment
+- duplicate detection across species bibliographies
+- topic expansion for subject areas such as habitat, trophic ecology, reproduction, invasive biology, and fisheries context
+
+### Later integration targets
+
+- node-level citation attachment
+- bibliography review UI
+- suggested-reading generation per species
+- topic-seeded bibliography augmentation for under-cited species drafts
+
+## API Changes
+
+Add standards-aware endpoints:
+
+- `/api/species/<slug>/identifiers`
+- `/api/species/<slug>/citations`
+- `/api/species/<slug>/bibliography/topics`
+- `/api/editor/species/<slug>/identifier-review`
+- `/api/editor/species/<slug>/citation-review`
+
+Do not remove legacy fields immediately. Keep `flelmr_code` in payloads for compatibility while introducing:
+
+- `legacy_identifiers`
+- `taxon_identifiers`
+- `primary_taxon_identifier`
+
+## UI Changes
+
+The species detail page should distinguish:
+
+- scientific name
+- primary external taxon identifier
+- legacy local identifiers
+- bibliography
+- suggested reading
+
+Editors should see:
+
+- unresolved authority matches
+- conflicting taxon IDs
+- citation enrichment candidates
+- duplicate-reference clusters
+
+Contributors should only author content and draft references; identifier normalization and bibliography publication remain editorial functions.
+
+## Migration Phases
+
+### Phase A: Demote legacy code
+
+- Rename internal presentation from “species code” to “legacy identifier”.
+- Keep `flelmr_code` only as legacy provenance.
+- Add `legacy_identifiers` to Markdown export and AST.
+
+### Phase B: Add external taxon identifiers
+
+- Create taxon-identifier tables and API payloads.
+- Add editor review workflows for selecting a primary authority identifier.
+- Default marine taxa review toward WoRMS where available.
+- Default broader cross-domain review toward Catalogue of Life and GBIF where WoRMS is not the right authority.
+- Keep the model open to terrestrial species from the beginning rather than treating them as out-of-scope exceptions.
+
+### Phase C: Structured bibliography
+
+- Create citation tables.
+- Extract plaintext references from imported documents.
+- Store draft citations separately from accepted citations.
+
+### Phase D: CiteGeist bridge
+
+- Define import/export format between EcoSpecies and CiteGeist.
+- Run draft-reference normalization and DOI enrichment.
+- Import reviewed structured citations back into EcoSpecies.
+
+### Phase E: Topic-aware bibliography growth
+
+- Store species topic phrases.
+- Use CiteGeist topic expansion for bibliography augmentation.
+- Keep added citations flagged by source type:
+  - imported
+  - resolved
+  - topic-expanded
+  - editor-added
+
+## Immediate Next Steps
+
+1. Update the Markdown profile to replace `species_code` with `legacy_identifiers` plus `taxon_identifiers`.
+2. Add `legacy_identifiers` and `taxon_identifiers` to the AST/document model.
+3. Introduce taxon identifier tables in the PostgreSQL schema.
+4. Define a minimal EcoSpecies-to-CiteGeist interchange format for plaintext references and topic phrases.
+5. Add editor-facing citation review before attempting automatic bibliography publication.
diff --git a/docs/structured-markdown-plan.md b/docs/structured-markdown-plan.md
new file mode 100644
index 0000000..40ccaa3
--- /dev/null
+++ b/docs/structured-markdown-plan.md
@@ -0,0 +1,338 @@
+# Structured Markdown Document Plan
+
+## Goal
+
+Replace the current flat, parser-heavy free-form text handling with a document model that is:
+
+- human-readable in plaintext
+- editable in the browser with hierarchy folding
+- permissive-license friendly
+- suitable for first-pass conversion from legacy SLH text files
+- suitable as the primary export format for a species life history
+- able to project cleanly into a flexible database model with greater hierarchical depth
+
+## Recommendation
+
+Adopt a constrained Markdown-based authoring format as the primary human-facing document format, backed by an internal hierarchical document AST and a relational projection layer in PostgreSQL.
+
+Use this three-layer model:
+
+1. Source and export format: constrained EcoSpecies Markdown
+2. Canonical application representation: hierarchical AST
+3. Database representation: relational projection for querying, indexing, publishing, and editorial workflows
+
+This avoids treating raw free-form text as both the storage format and the parser input.
+
+## Why Markdown Instead Of Org
+
+Markdown is the better fit for this codebase and licensing requirement because:
+
+- it is familiar to most users
+- it is easier to constrain than Org
+- it maps naturally to hierarchical headings
+- it works well with CodeMirror folding
+- it does not require adopting GPL or AGPL editor code
+
+Org-style authoring remains conceptually attractive, but embedding Org-specific tooling such as organice would introduce copyleft code, which is not aligned with a permissive-only implementation strategy.
+
+## EcoSpecies Markdown Profile
+
+The format should be Markdown-like, but intentionally narrower than unrestricted Markdown.
+
+### Metadata
+
+Use YAML front matter for canonical metadata fields:
+
+```md
+---
+title: American Oyster
+common_name: American Oyster
+scientific_name: Crassostrea virginica
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 5192
+    label: FLELMR
+taxon_identifiers:
+  - authority: worms
+    identifier: 159059
+    label: AphiaID
+    primary: true
+source_file: American Oyster SLH NOAA SEA.txt
+publication_status: published
+---
+```
+
+Recommended canonical fields:
+
+- `title`
+- `common_name`
+- `scientific_name`
+- `legacy_identifiers`
+- `taxon_identifiers`
+- `primary_taxon_authority`
+- `source_file`
+- `publication_status`
+- `source_format`
+- `legacy_import_id`
+
+### Hierarchy
+
+Use headings as the sole structure-bearing primitive.
+
+Example:
+
+```md
+---
+title: American Oyster
+common_name: American Oyster
+scientific_name: Crassostrea virginica
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 5192
+    label: FLELMR
+---
+
+## Summary
+Short editor-reviewed abstract.
+
+## Habitat
+
+### Type
+Estuarine.
+
+### Substrate
+Hard bottom, shell, mud flats, and other suitable settlement surfaces.
+
+## Reproduction
+
+### Season
+Spawning occurs from spring through fall in much of the Gulf.
+```
+
+Rules:
+
+- Heading depth is meaningful.
+- Skip-level headings should be rejected or normalized.
+- Body text belongs to the nearest preceding heading.
+- `#` level is optional if the document title already exists in front matter.
+- Tables, lists, and citations are allowed only where explicitly supported.
+- Arbitrary embedded HTML should be disallowed.
+
+### Citations
+
+Keep citations readable in Markdown but structured enough to parse.
+
+Preferred first-pass shape:
+
+```md
+## Citations
+
+- [7] Ahmed, M. 1975. Speciation in living oysters. Advances in Marine Biology 13:357-397.
+- [15] Andrews, J.D. 1979. Pelecypoda: Ostreidae. Reproduction of Marine Invertebrates...
+```
+
+This is intentionally simpler than trying to infer citations from arbitrary prose.
+
+## Canonical AST
+
+Markdown should not be the sole internal representation. Parse it into an AST that preserves hierarchy explicitly.
+
+Example conceptual shape:
+
+```json
+{
+  "metadata": {
+    "title": "American Oyster",
+    "common_name": "American Oyster",
+    "scientific_name": "Crassostrea virginica",
+    "legacy_identifiers": [
+      {
+        "authority": "legacy-ecospecies",
+        "identifier": "5192",
+        "label": "FLELMR"
+      }
+    ]
+  },
+  "nodes": [
+    {
+      "id": "n1",
+      "type": "section",
+      "depth": 2,
+      "title": "Summary",
+      "body": "Short editor-reviewed abstract.",
+      "children": []
+    },
+    {
+      "id": "n2",
+      "type": "section",
+      "depth": 2,
+      "title": "Habitat",
+      "body": "",
+      "children": [
+        {
+          "id": "n3",
+          "type": "section",
+          "depth": 3,
+          "title": "Type",
+          "body": "Estuarine.",
+          "children": []
+        }
+      ]
+    }
+  ]
+}
+```
+
+Required AST properties:
+
+- arbitrary hierarchical depth
+- stable node identifiers
+- separate metadata from body structure
+- support for editor audit and provenance
+- support for extracting source spans from imported legacy text when available
+
+## Database Direction
+
+The current flat `document_section` model should evolve into a general document tree.
+
+Suggested core tables:
+
+- `species_document`
+- `species_document_node`
+- `species_document_node_revision`
+- `species_document_metadata`
+- `citation`
+- `species_document_export`
+
+Suggested `species_document_node` fields:
+
+- `id`
+- `document_id`
+- `parent_id`
+- `position`
+- `depth`
+- `node_type`
+- `title`
+- `body_markdown`
+- `body_plaintext`
+- `source_heading`
+- `source_span_start`
+- `source_span_end`
+
+This enables:
+
+- greater hierarchical depth
+- stable editor operations on subtrees
+- future insertion of machine-extracted nested content
+- simplified export back to Markdown
+
+## Import Flow
+
+The legacy text parser should no longer attempt to infer the final database structure directly.
+
+Instead:
+
+1. Parse raw legacy text into a best-effort intermediate tree.
+2. Normalize extracted metadata.
+3. Emit constrained Markdown.
+4. Parse constrained Markdown into AST.
+5. Persist AST and project relationally.
+6. Record diagnostics on uncertain conversions.
+
+This changes the parser’s role from “infer final structure perfectly” to “produce a reviewable first draft”.
+
+## Editor Flow
+
+The web editor should operate primarily on the Markdown representation, with a structured parse running on save or preview.
+
+Recommended behavior:
+
+- fold by heading depth in CodeMirror
+- validate front matter and heading structure
+- preview rendered sections
+- show parser diagnostics inline
+- save both Markdown source and parsed AST
+
+The editor should reject or flag:
+
+- invalid front matter
+- duplicate canonical metadata keys
+- heading depth jumps
+- malformed citation entries in structured sections
+
+## Export Policy
+
+Markdown should be the primary export format for a species life history.
+
+Export targets:
+
+- constrained Markdown for editorial interchange
+- JSON AST for machine workflows
+- derived relational/API payloads for the application
+- optional report-oriented exports later
+
+The export path should be:
+
+- database document tree -> canonical AST -> constrained Markdown
+
+This ensures the exported plaintext remains stable and human-readable.
+
+## Migration Strategy
+
+### Stage 1: Introduce the document model
+
+- add AST schema and persistence tables
+- keep existing section-based reads working
+- build Markdown import/export helpers
+
+### Stage 2: Convert current parser output
+
+- map current parsed sections into Markdown drafts
+- preserve existing metadata and diagnostics
+- store generated Markdown alongside current records
+
+### Stage 3: Introduce Markdown editor
+
+- add CodeMirror-based editor with heading folding
+- add validation for front matter and heading structure
+- add round-trip save through AST
+
+### Stage 4: Move public reads to the new document model
+
+- generate current API responses from the hierarchical document tree
+- keep compatibility shims for legacy flat sections where needed
+
+### Stage 5: Expand structured extraction
+
+- add deeper parsing for habitat, reproduction, citations, and linkages
+- add richer projections from AST to relational tables
+
+## Immediate Implementation Tasks
+
+Recommended first engineering tasks:
+
+1. Define the constrained Markdown grammar and validation rules.
+2. Design the AST schema and PostgreSQL tables.
+3. Add Markdown import/export utilities in the API service.
+4. Prototype a CodeMirror editor with heading folding.
+5. Add a migration command that converts current species records into Markdown drafts.
+6. Preserve current endpoints while introducing the document-tree backing model.
+
+## Non-Goals For The First Pass
+
+- full unrestricted Markdown feature support
+- WYSIWYG editing
+- arbitrary embedded HTML
+- perfect citation parsing from all legacy free text
+- replacing every existing API shape immediately
+
+## Decision Summary
+
+The planned direction is:
+
+- constrained Markdown as the editable and exportable document format
+- internal AST as the canonical application representation
+- relational projection for queryable application state
+- CodeMirror-based browser editing with heading folding
+
+This is the most practical path toward human-editable hierarchy, permissive-only implementation, cleaner parsing, and deeper long-term document structure.
diff --git a/docs/traefik-deploy.md b/docs/traefik-deploy.md
new file mode 100644
index 0000000..647bf4f
--- /dev/null
+++ b/docs/traefik-deploy.md
@@ -0,0 +1,79 @@
+# Traefik Deployment Notes
+
+This note applies to the reverse-proxy deployment variant in `docs/docker-compose-traefik.yml`.
+
+## Start The Stack
+
+From the repository root:
+
+```bash
+cp docs/docker-compose-traefik.env.example docs/docker-compose-traefik.env
+# edit docs/docker-compose-traefik.env
+docker compose \
+  --env-file docs/docker-compose-traefik.env \
+  -f docs/docker-compose-traefik.yml \
+  up -d
+```
+
+## Common Failure Modes
+
+### Traefik cannot reach the web container
+
+Check:
+
+- the external Docker network named by `TRAEFIK_NETWORK` exists
+- the Traefik instance is attached to that same Docker network
+- the hostname in `ECOSPECIES_HOSTNAME` matches the Traefik router rule you expect
+- the path in `ECOSPECIES_BASE_PATH` matches the published application prefix, for example `/apps/ecospecies`
+
+### The site opens but the API fails
+
+Check:
+
+- the `api` service is healthy and running
+- the `web` service is using the repo's `apps/web/nginx.conf`
+- the `api` service finished waiting for `importer`
+- the request path is under `ECOSPECIES_BASE_PATH` if you are publishing the app below a domain root
+
+### Importer fails on startup
+
+Check:
+
+- `ECOSPECIES_LEGACY_DATA_DIR` points to a real host path
+- that path contains `InputFiles - TXT`
+- the mount is readable by Docker on the target host
+
+### Database does not initialize
+
+Check:
+
+- `ECOSPECIES_DB_PASSWORD` is set
+- the PostgreSQL volume is writable
+- an old incompatible volume is not being reused unintentionally
+
+### Editor login works but no editor state is available
+
+Check:
+
+- `ECOSPECIES_AUTH_TOKENS` is set on the `api` service
+- the token you entered matches the configured value exactly
+
+## Operational Notes
+
+- This deployment variant intentionally exposes only the `web` container to Traefik.
+- The `api`, `db`, and `importer` services stay on the internal Compose network.
+- The `importer` runs before the API starts and seeds or synchronizes the dataset.
+- The web container serves both the domain root and `/apps/ecospecies/`, but the Traefik router should target the intended public path.
+
+## Apache Front Door
+
+If Apache is the public front door for the hostname in `ECOSPECIES_HOSTNAME`, it must proxy the configured `ECOSPECIES_BASE_PATH` onward. Otherwise Apache can return its own `Not Found` page before the EcoSpecies stack sees the request.
+
+Example Apache directives:
+
+```apache
+ProxyPass        /apps/ecospecies http://127.0.0.1:80/apps/ecospecies
+ProxyPassReverse /apps/ecospecies http://127.0.0.1:80/apps/ecospecies
+```
+
+Point the backend address at the actual Traefik listener on the host if it is not `127.0.0.1:80`, and adjust the published path if `ECOSPECIES_BASE_PATH` is different.
diff --git a/scripts/backfill-citations.py b/scripts/backfill-citations.py
new file mode 100644
index 0000000..9b29cd3
--- /dev/null
+++ b/scripts/backfill-citations.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from ecospecies_api.repository import (
+    get_editor_species_citations,
+    get_editor_species_list,
+    update_species_citation_enrichment,
+)
+
+
+def should_backfill(citation: dict[str, object], include_accepted: bool) -> bool:
+    review_status = str(citation.get("review_status", "")).strip().lower()
+    source_type = str(citation.get("source_type", "")).strip().lower()
+    enrichment_status = str(citation.get("enrichment_status", "")).strip().lower()
+    normalized_text = str(citation.get("normalized_text", "")).strip()
+    abstract_text = str(citation.get("abstract_text", "")).strip()
+
+    if not include_accepted and review_status == "accepted":
+        return False
+    if source_type in {"editor_selected_candidate", "editor_added_candidate"} and not include_accepted:
+        return False
+
+    return (
+        source_type in {"document_extract", "editor_review", ""}
+        or enrichment_status in {"pending", "unresolved", "error", ""}
+        or not normalized_text
+        or not abstract_text
+    )
+
+
+def reorder_species_with_cursor(
+    species_items: list[dict[str, object]],
+    state_file: Path | None,
+) -> list[dict[str, object]]:
+    if not state_file or not species_items:
+        return species_items
+
+    try:
+        last_slug = state_file.read_text(encoding="utf-8").strip()
+    except FileNotFoundError:
+        return species_items
+
+    if not last_slug:
+        return species_items
+
+    for index, item in enumerate(species_items):
+        if str(item.get("slug", "")).strip() == last_slug:
+            return species_items[index + 1 :] + species_items[: index + 1]
+    return species_items
+
+
+def write_cursor(state_file: Path | None, slug: str) -> None:
+    if not state_file or not slug:
+        return
+    state_file.parent.mkdir(parents=True, exist_ok=True)
+    state_file.write_text(f"{slug}\n", encoding="utf-8")
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Backfill EcoSpecies citation enrichment.")
+    parser.add_argument("--slug", help="Limit the backfill to a single species slug.")
+    parser.add_argument("--username", default="citation-backfill", help="Audit username to record.")
+    parser.add_argument(
+        "--include-accepted",
+        action="store_true",
+        help="Also rerun accepted/editor-curated citations.",
+    )
+    parser.add_argument(
+        "--max-species",
+        type=int,
+        default=0,
+        help="Stop after this many species with eligible citations. 0 means no limit.",
+    )
+    parser.add_argument(
+        "--max-citations",
+        type=int,
+        default=0,
+        help="Stop after this many citations overall. 0 means no limit.",
+    )
+    parser.add_argument(
+        "--state-file",
+        help="Optional cursor file used to rotate scheduled runs through the species list.",
+    )
+    args = parser.parse_args()
+
+    state_file = Path(args.state_file).expanduser() if args.state_file else None
+    species_items = (
+        [item for item in get_editor_species_list() if item["slug"] == args.slug]
+        if args.slug
+        else get_editor_species_list()
+    )
+    if not args.slug:
+        species_items = reorder_species_with_cursor(species_items, state_file)
+
+    if args.slug and not species_items:
+        print(f"Species not found: {args.slug}")
+        return 1
+
+    species_count = 0
+    citation_count = 0
+    changed_count = 0
+    resolved_count = 0
+    unresolved_count = 0
+    error_count = 0
+    last_seen_slug = ""
+
+    for species in species_items:
+        if args.max_species and species_count >= args.max_species:
+            break
+        slug = str(species["slug"])
+        last_seen_slug = slug
+        citation_payload = get_editor_species_citations(slug)
+        if citation_payload is None:
+            continue
+
+        eligible = [
+            citation
+            for citation in citation_payload["citations"]
+            if should_backfill(citation, include_accepted=args.include_accepted)
+        ]
+        if not eligible:
+            continue
+
+        species_count += 1
+        print(f"[{slug}] backfilling {len(eligible)} citation(s)", flush=True)
+
+        for citation in eligible:
+            if args.max_citations and citation_count >= args.max_citations:
+                write_cursor(state_file, last_seen_slug)
+                print("citation limit reached; stopping early", flush=True)
+                print(
+                    "summary:"
+                    f" species={species_count}"
+                    f" citations={citation_count}"
+                    f" changed={changed_count}"
+                    f" resolved={resolved_count}"
+                    f" unresolved={unresolved_count}"
+                    f" errors={error_count}",
+                    flush=True,
+                )
+                return 0
+            citation_count += 1
+            result = update_species_citation_enrichment(
+                slug=slug,
+                citation_id=int(citation["id"]),
+                username=args.username,
+            )
+            if result is None:
+                print(f"  - citation {citation['id']}: skipped (not found)", flush=True)
+                continue
+
+            changed_fields = result.get("changed_fields", {})
+            status = str(result["citation"].get("enrichment_status", "")).strip().lower()
+            if changed_fields:
+                changed_count += 1
+            if status == "resolved":
+                resolved_count += 1
+            elif status == "unresolved":
+                unresolved_count += 1
+            elif status == "error":
+                error_count += 1
+            print(
+                f"  - citation {citation['id']}: {status or 'unknown'}"
+                + (f" ({len(changed_fields)} field changes)" if changed_fields else "")
+            , flush=True)
+
+    write_cursor(state_file, last_seen_slug)
+    print(
+        "summary:"
+        f" species={species_count}"
+        f" citations={citation_count}"
+        f" changed={changed_count}"
+        f" resolved={resolved_count}"
+        f" unresolved={unresolved_count}"
+        f" errors={error_count}",
+        flush=True,
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/backup-postgres.sh b/scripts/backup-postgres.sh
new file mode 100644
index 0000000..77fac31
--- /dev/null
+++ b/scripts/backup-postgres.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+set -eu
+
+ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
+ENV_FILE="${ECOSPECIES_ENV_FILE:-$ROOT_DIR/docs/docker-compose-traefik.env}"
+COMPOSE_FILE="${ECOSPECIES_COMPOSE_FILE:-$ROOT_DIR/docs/docker-compose-traefik.yml}"
+OUTPUT_FILE="${1:-$ROOT_DIR/ecospecies-backup.sql}"
+
+if [ ! -f "$ENV_FILE" ]; then
+  echo "Missing env file: $ENV_FILE" >&2
+  exit 1
+fi
+
+set -a
+. "$ENV_FILE"
+set +a
+
+DB_USER="${ECOSPECIES_DB_USER:-ecospecies}"
+DB_NAME="${ECOSPECIES_DB_NAME:-ecospecies}"
+
+docker compose \
+  --env-file "$ENV_FILE" \
+  -f "$COMPOSE_FILE" \
+  exec -T db \
+  pg_dump -U "$DB_USER" "$DB_NAME" \
+  > "$OUTPUT_FILE"
+
+printf 'Backup written to %s\n' "$OUTPUT_FILE"
diff --git a/scripts/restore-postgres.sh b/scripts/restore-postgres.sh
new file mode 100644
index 0000000..1f86814
--- /dev/null
+++ b/scripts/restore-postgres.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+set -eu
+
+ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
+ENV_FILE="${ECOSPECIES_ENV_FILE:-$ROOT_DIR/docs/docker-compose-traefik.env}"
+COMPOSE_FILE="${ECOSPECIES_COMPOSE_FILE:-$ROOT_DIR/docs/docker-compose-traefik.yml}"
+INPUT_FILE="${1:-}"
+
+if [ -z "$INPUT_FILE" ]; then
+  echo "Usage: $0 <sql-backup-file>" >&2
+  exit 1
+fi
+
+if [ ! -f "$ENV_FILE" ]; then
+  echo "Missing env file: $ENV_FILE" >&2
+  exit 1
+fi
+
+if [ ! -f "$INPUT_FILE" ]; then
+  echo "Missing backup file: $INPUT_FILE" >&2
+  exit 1
+fi
+
+set -a
+. "$ENV_FILE"
+set +a
+
+DB_USER="${ECOSPECIES_DB_USER:-ecospecies}"
+DB_NAME="${ECOSPECIES_DB_NAME:-ecospecies}"
+
+cat "$INPUT_FILE" | docker compose \
+  --env-file "$ENV_FILE" \
+  -f "$COMPOSE_FILE" \
+  exec -T db \
+  psql -U "$DB_USER" "$DB_NAME"
+
+printf 'Restore completed from %s\n' "$INPUT_FILE"
diff --git a/scripts/run-citation-backfill.sh b/scripts/run-citation-backfill.sh
new file mode 100644
index 0000000..dc21570
--- /dev/null
+++ b/scripts/run-citation-backfill.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+set -eu
+
+ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
+LOG_DIR="${ECOSPECIES_BACKFILL_LOG_DIR:-$ROOT_DIR/var/logs}"
+STATE_FILE="${ECOSPECIES_BACKFILL_STATE_FILE:-$ROOT_DIR/var/citation-backfill.cursor}"
+LOCK_DIR="${ECOSPECIES_BACKFILL_LOCK_DIR:-$ROOT_DIR/var/citation-backfill.lock}"
+MAX_SPECIES="${ECOSPECIES_BACKFILL_MAX_SPECIES:-3}"
+
+mkdir -p "$LOG_DIR"
+mkdir -p "$ROOT_DIR/var"
+
+if ! mkdir "$LOCK_DIR" 2>/dev/null; then
+  echo "citation backfill already running; skipping"
+  exit 0
+fi
+
+trap 'rmdir "$LOCK_DIR"' EXIT INT TERM
+
+exec docker exec ecospecies-api /bin/sh -lc \
+  "PYTHONPATH=/workspace/apps/api/src /workspace/.docker/venv/bin/python -u /workspace/scripts/backfill-citations.py --username citation-backfill --max-species ${MAX_SPECIES} --state-file ${STATE_FILE}"