From a6b04a995acd38ed571fb097acd5be06acf5da58 Mon Sep 17 00:00:00 2001 From: welsberr Date: Thu, 26 Mar 2026 00:54:55 -0400 Subject: [PATCH] Initial commit --- .forgejo/workflows/ci.yml.template | 48 ++ .github/pull_request_template.md | 14 + .github/workflows/ci.yml | 46 ++ .gitignore | 236 +--------- CONTRIBUTING.md | 37 ++ README.md | 119 ++++- apps/README.md | 18 + apps/api/requirements.txt | 2 + apps/api/src/ecospecies_api/__init__.py | 1 + apps/api/src/ecospecies_api/app.py | 433 ++++++++++++++++++ apps/api/src/ecospecies_api/auth.py | 81 ++++ apps/api/src/ecospecies_api/cli.py | 14 + apps/api/src/ecospecies_api/db.py | 24 + apps/api/src/ecospecies_api/models.py | 79 ++++ apps/api/src/ecospecies_api/parser.py | 224 ++++++++++ apps/api/src/ecospecies_api/repository.py | 508 ++++++++++++++++++++++ apps/api/test_repository.py | 21 + apps/api/tests/test_repository.py | 307 +++++++++++++ apps/web/app.js | 349 +++++++++++++++ apps/web/index.html | 109 +++++ apps/web/nginx.conf | 32 ++ apps/web/styles.css | 427 ++++++++++++++++++ docker-compose.yml | 78 ++++ docs/flelmr-authoring.md | 44 ++ docs/forgejo-activation.md | 38 ++ docs/legacy-survey.md | 101 +++++ docs/roadmap.md | 126 ++++++ package-lock.json | 76 ++++ package.json | 11 + playwright.config.js | 22 + scripts/bootstrap-python-env.sh | 13 + scripts/check-api-tests.sh | 7 + scripts/check-ui-smoke.sh | 10 + scripts/check-ui-stack-smoke.sh | 10 + scripts/run-api.sh | 9 + scripts/run-import.sh | 9 + tests/ui/editor-ui.spec.js | 46 ++ tests/ui/live-stack.spec.js | 46 ++ tests/ui/server.js | 240 ++++++++++ 39 files changed, 3786 insertions(+), 229 deletions(-) create mode 100644 .forgejo/workflows/ci.yml.template create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/ci.yml create mode 100644 CONTRIBUTING.md create mode 100644 apps/README.md create mode 100644 apps/api/requirements.txt create mode 100644 apps/api/src/ecospecies_api/__init__.py create mode 100644 apps/api/src/ecospecies_api/app.py create mode 100644 apps/api/src/ecospecies_api/auth.py create mode 100644 apps/api/src/ecospecies_api/cli.py create mode 100644 apps/api/src/ecospecies_api/db.py create mode 100644 apps/api/src/ecospecies_api/models.py create mode 100644 apps/api/src/ecospecies_api/parser.py create mode 100644 apps/api/src/ecospecies_api/repository.py create mode 100644 apps/api/test_repository.py create mode 100644 apps/api/tests/test_repository.py create mode 100644 apps/web/app.js create mode 100644 apps/web/index.html create mode 100644 apps/web/nginx.conf create mode 100644 apps/web/styles.css create mode 100644 docker-compose.yml create mode 100644 docs/flelmr-authoring.md create mode 100644 docs/forgejo-activation.md create mode 100644 docs/legacy-survey.md create mode 100644 docs/roadmap.md create mode 100644 package-lock.json create mode 100644 package.json create mode 100644 playwright.config.js create mode 100644 scripts/bootstrap-python-env.sh create mode 100644 scripts/check-api-tests.sh create mode 100644 scripts/check-ui-smoke.sh create mode 100644 scripts/check-ui-stack-smoke.sh create mode 100644 scripts/run-api.sh create mode 100644 scripts/run-import.sh create mode 100644 tests/ui/editor-ui.spec.js create mode 100644 tests/ui/live-stack.spec.js create mode 100644 tests/ui/server.js diff --git a/.forgejo/workflows/ci.yml.template b/.forgejo/workflows/ci.yml.template new file mode 100644 index 0000000..e152696 --- /dev/null +++ b/.forgejo/workflows/ci.yml.template @@ -0,0 +1,48 @@ +name: CI + +on: + push: + pull_request: + +jobs: + api-tests: + # Replace `docker` with the runner label used by the target Forgejo instance. + runs-on: docker + steps: + # Replace these `uses:` references if the target Forgejo instance does not + # allow GitHub-hosted actions directly. + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install API dependencies + run: pip install -r apps/api/requirements.txt + + - name: Run repository tests + run: ./scripts/check-api-tests.sh + + ui-smoke: + # Replace `docker` with the runner label used by the target Forgejo instance. + runs-on: docker + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: "20" + cache: "npm" + + - name: Install UI test dependencies + run: npm ci + + - name: Install Playwright browser + run: npx playwright install --with-deps chromium + + - name: Run stub UI smoke test + run: ./scripts/check-ui-smoke.sh diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..11f847e --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,14 @@ +## Summary + +- + +## Verification + +- [ ] `cd apps/api && python -m unittest -v` +- [ ] `npm run test:ui` +- [ ] `npm run test:ui:stack` if editor/archive behavior or real-stack wiring changed + +## Notes + +- Risk areas: +- Follow-up work: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..e48ec0f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,46 @@ +name: CI + +on: + push: + branches: + - "**" + pull_request: + +jobs: + api-tests: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install API dependencies + run: pip install -r apps/api/requirements.txt + + - name: Run repository tests + run: ./scripts/check-api-tests.sh + + ui-smoke: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: "20" + cache: "npm" + + - name: Install UI test dependencies + run: npm ci + + - name: Install Playwright browser + run: npx playwright install --with-deps chromium + + - name: Run stub UI smoke test + run: ./scripts/check-ui-smoke.sh diff --git a/.gitignore b/.gitignore index 80d0605..6401844 100644 --- a/.gitignore +++ b/.gitignore @@ -1,229 +1,9 @@ -# ---> Python -# Byte-compiled / optimized / DLL files +.docker/pip-cache/ +.docker/venv/ +var/postgres/ +var/sqlite/ __pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - -# ---> Emacs -# -*- mode: gitignore; -*- -*~ -\#*\# -/.emacs.desktop -/.emacs.desktop.lock -*.elc -auto-save-list -tramp -.\#* - -# Org-mode -.org-id-locations -*_archive - -# flymake-mode -*_flymake.* - -# eshell files -/eshell/history -/eshell/lastdir - -# elpa packages -/elpa/ - -# reftex files -*.rel - -# AUCTeX auto folder -/auto/ - -# cask packages -.cask/ -dist/ - -# Flycheck -flycheck_*.el - -# server auth directory -/server/ - -# projectiles files -.projectile - -# directory configuration -.dir-locals.el - -# network security -/network-security.data - - -# ---> Rust -# Generated by Cargo -# will have compiled files and executables -debug/ -target/ - -# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries -# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html -Cargo.lock - -# These are backup files generated by rustfmt -**/*.rs.bk - -# MSVC Windows builds of rustc generate these, which store debugging information -*.pdb - +*.pyc +node_modules/ +test-results/ +playwright-report/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..497ee2c --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,37 @@ +# Contributing + +## Expectations + +Changes should preserve the current verification baseline: + +- repository-layer API tests must pass +- the stubbed browser smoke test must pass for UI/editor changes +- the live-stack browser smoke test should be run for changes that affect real editor/archive workflows when the Compose stack is available + +## Local Checks + +Run API tests: + +```bash +./scripts/check-api-tests.sh +``` + +Run the stubbed browser smoke test: + +```bash +./scripts/check-ui-smoke.sh +``` + +Run the real-stack browser smoke test: + +```bash +./scripts/check-ui-stack-smoke.sh +``` + +## Notes + +- CI currently runs the API tests and the stubbed browser smoke test. +- The live-stack smoke test is intentionally separate because it depends on a running Compose stack and editor auth configuration. +- If a change affects editorial persistence, archive behavior, or section editing, include the relevant verification command output or a concise summary in the change request. +- Repository hosts can call the scripts in `scripts/` directly instead of duplicating command wiring in host-specific pipeline definitions. +- Forgejo can use the existing `.github/workflows` CI definition through its documented fallback behavior. If a Forgejo-native pipeline is needed, start from `.forgejo/workflows/ci.yml.template`, then follow `docs/forgejo-activation.md` before activating it. diff --git a/README.md b/README.md index 8f409d3..1c731e8 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,120 @@ # EcoSpecies-Atlas -EcoSpecies-Atlas is a web app that provides species life history information to a broader audience. \ No newline at end of file +Modern open-source follow-on to the legacy EcoSpecies application, built to ingest historical Species Life History materials and evolve into a maintainable public research platform. + +## Repository layout + +- `apps/api`: Python API and import logic +- `apps/web`: public web UI served by nginx +- `docs`: migration survey and roadmap +- `scripts`: Compose runtime scripts for bootstrapping the container-managed Python environment +- Docker named volume `python_venv`: container-managed Python virtual environment +- Docker named volume `pip_cache`: pip cache for container bootstrapping +- Docker named volume `postgres_data`: PostgreSQL data directory +- `var/sqlite`: host fallback for local non-Compose verification + +## Runtime model + +Docker Compose owns all runtime dependencies: + +- PostgreSQL runs in a container with a Docker-managed named volume +- Python services run in `python:3.12-slim` +- the Python virtual environment is created in a Docker-managed volume mounted at `/workspace/.docker/venv` +- dependencies are installed from `apps/api/requirements.txt` inside that virtual environment +- the legacy corpus is mounted read-only from `../01-legacy-code-and-data` + +No host Python packages are required for the Compose workflow. + +## Start the stack + +```bash +cd EcoSpecies-Atlas +docker compose up +``` + +Endpoints: + +- web UI: `http://localhost:8080` +- API: `http://localhost:8000` +- PostgreSQL: `localhost:5432` +- liveness: `/healthz` +- readiness: `/readyz` +- auth session: `/api/auth/session` +- editor status: `/api/editor/status` (requires `editor` or `admin`) +- editor species list: `/api/editor/species` (requires `editor` or `admin`) +- editor workflow detail/update: `/api/editor/species//workflow` (requires `editor` or `admin`) +- editor species detail: `/api/editor/species/` (requires `editor` or `admin`) +- editor editorial update: `/api/editor/species//editorial` (requires `editor` or `admin`) +- editor section detail/update: `/api/editor/species//sections/` (requires `editor` or `admin`) +- editor audit history: `/api/editor/species//audit` (requires `editor` or `admin`) + +If those host ports are already in use, override them when starting Compose, for example: + +```bash +ECOSPECIES_API_PORT=18000 ECOSPECIES_WEB_PORT=18080 docker compose up +``` + +## Host-visible state + +All important runtime state is bind-mounted and visible on the host: + +- source code and docs in this repo +- SQLite fallback database in `var/sqlite` +- optional SQLite fallback in `var/sqlite` + +## Automated checks + +Repository-host CI runs the repository-layer tests and the stubbed browser smoke test on pushes and change requests. +Contributor workflow guidance is in `CONTRIBUTING.md`. + +When hosted on Forgejo, the current GitHub-compatible workflow layout can still be used. Forgejo Actions will look for workflows in `.forgejo/workflows`, and if that directory is absent it will fall back to `.github/workflows`. A Forgejo-native template is provided at `.forgejo/workflows/ci.yml.template`; copy it to `.forgejo/workflows/ci.yml` only after adapting the runner label and action source policy for the target instance. The activation checklist is in `docs/forgejo-activation.md`. + +Run the repository-layer test suite with: + +```bash +./scripts/check-api-tests.sh +``` + +Run the browser-level editor smoke test with: + +```bash +./scripts/check-ui-smoke.sh +``` + +Run the browser-level smoke test against the real Compose stack with: + +```bash +./scripts/check-ui-stack-smoke.sh +``` + +## Notes + +- The importer seeds PostgreSQL from the legacy text corpus before the API starts and now synchronizes by slug instead of truncating the full dataset. +- Species missing from a later import payload are archived instead of deleted. Public endpoints hide archived records; editor endpoints can still inspect them. +- The editor species list supports `active`, `all`, and `archived` client-side filtering so archived records remain manageable in the UI. +- Editors can also archive or unarchive species explicitly from the editorial controls, with audit history recorded alongside other editorial changes. +- The API also supports a host-local SQLite fallback for direct verification when `ECOSPECIES_DATABASE_URL` is unset. +- PostgreSQL, the Python virtualenv, and the pip cache use Docker named volumes because bind-mounted runtime state is not reliable on CIFS-backed workspaces like this one. +- Initial editor auth uses `ECOSPECIES_AUTH_TOKENS` in the format `token:username:role[,token2:username2:role2]`, where `role` is `viewer`, `editor`, or `admin`. +- Editorial workflow state is persisted per species with `draft`, `review`, and `published` statuses. Public endpoints return only `published` records; editor endpoints can inspect and update all records. +- Editors can curate top-level metadata and section content from the web UI, and every editorial or section change is recorded in per-species audit history. +- Summary authoring guidance for future FLELMR-compatible records is in `docs/flelmr-authoring.md`. +- Legacy survey and roadmap artifacts are in `docs/`. + +## Governance And Operations + +The repository host, such as GitHub or Forgejo, is used for source control, change requests, code review, and CI checks. It is not part of the application runtime. + +EcoSpecies-Atlas itself runs through Docker Compose, the Python API, nginx, and PostgreSQL. Import jobs, editor workflows, and browser access all depend on the application stack, not on the repository host. + +In practice, the repository host is responsible for change management: + +- storing the code, docs, workflow definitions, and test harnesses +- running CI checks on pushes and change requests +- supporting review and merge workflows + +In practice, the application stack is responsible for operations: + +- serving the web UI and API +- persisting editorial and import state +- running imports and editor workflows diff --git a/apps/README.md b/apps/README.md new file mode 100644 index 0000000..9e1a838 --- /dev/null +++ b/apps/README.md @@ -0,0 +1,18 @@ +# Application Notes + +## API + +The API lives in `apps/api/src/ecospecies_api` and supports two storage modes: + +- PostgreSQL through Docker Compose +- SQLite fallback for host-local verification + +The importer writes persisted `species`, `document_section`, and `ingest_diagnostic` records before the API serves traffic. + +## Web + +The web app is static and served by nginx. nginx proxies `/api/*` and `/healthz` to the API service inside the Compose network. + +## Dependency handling + +Compose bootstraps a bind-mounted Python virtual environment in `.docker/venv` and installs `apps/api/requirements.txt` into that environment. This keeps runtime dependencies isolated from the host Python installation while leaving the environment visible on the host filesystem. diff --git a/apps/api/requirements.txt b/apps/api/requirements.txt new file mode 100644 index 0000000..1764cb9 --- /dev/null +++ b/apps/api/requirements.txt @@ -0,0 +1,2 @@ +SQLAlchemy>=2.0,<3.0 +psycopg[binary]>=3.2,<4.0 diff --git a/apps/api/src/ecospecies_api/__init__.py b/apps/api/src/ecospecies_api/__init__.py new file mode 100644 index 0000000..88f0993 --- /dev/null +++ b/apps/api/src/ecospecies_api/__init__.py @@ -0,0 +1 @@ +"""EcoSpecies API package.""" diff --git a/apps/api/src/ecospecies_api/app.py b/apps/api/src/ecospecies_api/app.py new file mode 100644 index 0000000..f8c91ed --- /dev/null +++ b/apps/api/src/ecospecies_api/app.py @@ -0,0 +1,433 @@ +from __future__ import annotations + +import json +import os +import time +from http import HTTPStatus +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from urllib.parse import parse_qs, urlparse + +from ecospecies_api.auth import ( + AuthSession, + auth_is_configured, + resolve_auth_session, + role_satisfies, +) +from ecospecies_api.parser import get_default_data_dir, load_species_records +from ecospecies_api.repository import ( + get_editor_species_detail, + get_editor_species_list, + get_editor_species_workflow, + get_species_by_slug, + list_species_audit, + get_readiness_status, + get_summary_metrics, + has_species_data, + import_species_payload, + list_diagnostics, + list_species, + update_species_section, + update_species_editorial, +) + + +def make_payload() -> list[dict[str, object]]: + records = load_species_records(get_default_data_dir()) + payload: list[dict[str, object]] = [] + for record in records: + payload.append( + { + "slug": record.slug, + "source_file": record.source_file, + "title": record.title, + "common_name": record.common_name, + "scientific_name": record.scientific_name, + "flelmr_code": record.flelmr_code, + "summary": record.summary, + "section_count": record.section_count, + "diagnostics": [ + { + "level": diagnostic.level, + "code": diagnostic.code, + "message": diagnostic.message, + } + for diagnostic in record.diagnostics + ], + "sections": [ + {"heading": section.heading, "content": section.content} + for section in record.sections + ], + } + ) + return payload + + +class EcoSpeciesHandler(BaseHTTPRequestHandler): + server_version = "EcoSpeciesHTTP/0.1" + + def do_GET(self) -> None: + parsed = urlparse(self.path) + path = parsed.path + query = parse_qs(parsed.query) + session = resolve_auth_session(self.headers) + + if path == "/healthz": + self.write_json( + { + "status": "ok", + "check": "liveness", + "process": "running", + } + ) + return + + if path == "/readyz": + readiness = get_readiness_status() + status = HTTPStatus.OK if readiness["ready"] else HTTPStatus.SERVICE_UNAVAILABLE + self.write_json( + { + "status": "ok" if readiness["ready"] else "degraded", + "check": "readiness", + **readiness, + }, + status=status, + ) + return + + if path == "/api/auth/session": + self.write_json( + { + "authenticated": session is not None, + "auth_configured": auth_is_configured(), + "user": ( + {"username": session.username, "role": session.role} + if session is not None + else None + ), + } + ) + return + + if path == "/api/editor/status": + if not self.require_role(session, "editor"): + return + self.write_json( + { + "status": "ok", + "editor_access": True, + "user": {"username": session.username, "role": session.role}, + "capabilities": [ + "draft_ingest_review", + "editorial_curation", + "future_report_authoring", + ], + } + ) + return + + if path == "/api/editor/species": + if not self.require_role(session, "editor"): + return + search = query.get("search", [""])[0].strip().lower() + items = get_editor_species_list(search) + compact = [ + { + "slug": item["slug"], + "title": item["title"], + "common_name": item["common_name"], + "publication_status": item["publication_status"], + "is_archived": item["is_archived"], + "last_modified_by": item["last_modified_by"], + "diagnostic_count": len(item["diagnostics"]), + } + for item in items + ] + self.write_json({"items": compact, "count": len(compact)}) + return + + if path.startswith("/api/editor/species/") and path.endswith("/audit"): + if not self.require_role(session, "editor"): + return + slug = path[len("/api/editor/species/") : -len("/audit")].strip("/") + items = list_species_audit(slug) + if items is None: + self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND) + return + self.write_json({"items": items, "count": len(items)}) + return + + if path.startswith("/api/editor/species/") and "/sections/" in path: + if not self.require_role(session, "editor"): + return + slug, _, tail = path[len("/api/editor/species/") :].partition("/sections/") + try: + section_position = int(tail.strip("/")) + except ValueError: + self.write_json({"error": "Invalid section position"}, status=HTTPStatus.BAD_REQUEST) + return + item = get_editor_species_detail(slug.strip("/")) + if item is None: + self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND) + return + for section in item["sections"]: + if section["position"] == section_position: + self.write_json(section) + return + self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND) + return + + if path.startswith("/api/editor/species/") and not path.endswith("/workflow") and not path.endswith("/editorial") and not path.endswith("/audit"): + if not self.require_role(session, "editor"): + return + slug = path[len("/api/editor/species/") :].strip("/") + item = get_editor_species_detail(slug) + if item is None: + self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND) + return + self.write_json(item) + return + + if path.startswith("/api/editor/species/") and path.endswith("/workflow"): + if not self.require_role(session, "editor"): + return + slug = path[len("/api/editor/species/") : -len("/workflow")].strip("/") + item = get_editor_species_workflow(slug) + if item is None: + self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND) + return + self.write_json(item) + return + + if path == "/api/insights/summary": + metrics = get_summary_metrics() + self.write_json( + { + "species_count": metrics["species_count"], + "section_count": metrics["section_count"], + "diagnostic_counts": metrics["diagnostic_counts"], + "source_directory": get_default_data_dir(), + } + ) + return + + if path == "/api/insights/diagnostics": + flagged = list_diagnostics() + self.write_json({"items": flagged, "count": len(flagged)}) + return + + if path == "/api/species": + search = query.get("search", [""])[0].strip().lower() + species = list_species(search) + compact = [ + { + "slug": item["slug"], + "title": item["title"], + "common_name": item["common_name"], + "scientific_name": item["scientific_name"], + "flelmr_code": item["flelmr_code"], + "summary": item["summary"], + "section_count": item["section_count"], + "diagnostic_count": len(item["diagnostics"]), + } + for item in species + ] + self.write_json({"items": compact, "count": len(compact)}) + return + + if path.startswith("/api/species/"): + slug = path.rsplit("/", 1)[-1] + item = get_species_by_slug(slug) + if item is not None: + self.write_json(item) + return + self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND) + return + + self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND) + + def do_POST(self) -> None: + parsed = urlparse(self.path) + path = parsed.path + session = resolve_auth_session(self.headers) + + if path.startswith("/api/editor/species/") and path.endswith("/workflow"): + if not self.require_role(session, "editor"): + return + + payload = self.read_json_body() + if payload is None: + return + + slug = path[len("/api/editor/species/") : -len("/workflow")].strip("/") + try: + result = update_species_editorial( + slug=slug, + publication_status=payload.get("publication_status"), + summary=None, + editor_notes=payload.get("editor_notes"), + is_archived=payload.get("is_archived"), + username=session.username, + ) + except ValueError as exc: + self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST) + return + + if result is None: + self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND) + return + + self.write_json({"status": "ok", **result}) + return + + if path.startswith("/api/editor/species/") and path.endswith("/editorial"): + if not self.require_role(session, "editor"): + return + + payload = self.read_json_body() + if payload is None: + return + + slug = path[len("/api/editor/species/") : -len("/editorial")].strip("/") + try: + result = update_species_editorial( + slug=slug, + publication_status=payload.get("publication_status"), + summary=payload.get("summary"), + editor_notes=payload.get("editor_notes"), + is_archived=payload.get("is_archived"), + username=session.username, + ) + except ValueError as exc: + self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST) + return + + if result is None: + self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND) + return + + self.write_json({"status": "ok", **result}) + return + + if path.startswith("/api/editor/species/") and "/sections/" in path: + if not self.require_role(session, "editor"): + return + + payload = self.read_json_body() + if payload is None: + return + + slug, _, tail = path[len("/api/editor/species/") :].partition("/sections/") + try: + section_position = int(tail.strip("/")) + except ValueError: + self.write_json({"error": "Invalid section position"}, status=HTTPStatus.BAD_REQUEST) + return + + content = payload.get("content") + if not isinstance(content, str): + self.write_json({"error": "content must be a string"}, status=HTTPStatus.BAD_REQUEST) + return + + result = update_species_section( + slug=slug.strip("/"), + section_position=section_position, + content=content, + username=session.username, + ) + if result is None: + self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND) + return + + self.write_json({"status": "ok", **result}) + return + + self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND) + + def log_message(self, format: str, *args: object) -> None: + return + + def require_role(self, session: AuthSession | None, required_role: str) -> bool: + if session is None: + self.write_json( + { + "error": "Authentication required", + "required_role": required_role, + "auth_configured": auth_is_configured(), + }, + status=HTTPStatus.UNAUTHORIZED, + ) + return False + + if not role_satisfies(session.role, required_role): + self.write_json( + { + "error": "Insufficient role", + "required_role": required_role, + "user_role": session.role, + "username": session.username, + }, + status=HTTPStatus.FORBIDDEN, + ) + return False + + return True + + def read_json_body(self) -> dict[str, object] | None: + content_length = self.headers.get("Content-Length", "0").strip() + try: + length = int(content_length or "0") + except ValueError: + self.write_json({"error": "Invalid Content-Length"}, status=HTTPStatus.BAD_REQUEST) + return None + + raw_body = self.rfile.read(length) if 0 < length else b"{}" + try: + parsed = json.loads(raw_body.decode("utf-8")) + except json.JSONDecodeError: + self.write_json({"error": "Invalid JSON body"}, status=HTTPStatus.BAD_REQUEST) + return None + + if not isinstance(parsed, dict): + self.write_json({"error": "JSON body must be an object"}, status=HTTPStatus.BAD_REQUEST) + return None + + return parsed + + def write_json(self, payload: dict[str, object], status: HTTPStatus = HTTPStatus.OK) -> None: + body = json.dumps(payload, ensure_ascii=True).encode("utf-8") + self.send_response(status.value) + self.send_header("Content-Type", "application/json; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.send_header("Access-Control-Allow-Origin", "*") + if status == HTTPStatus.UNAUTHORIZED: + self.send_header("WWW-Authenticate", 'Bearer realm="EcoSpecies-Atlas"') + self.end_headers() + self.wfile.write(body) + + +def main() -> None: + host = os.environ.get("ECOSPECIES_HOST", "127.0.0.1") + port = int(os.environ.get("ECOSPECIES_PORT", "8000")) + + last_error: Exception | None = None + for attempt in range(1, 16): + try: + if not has_species_data(): + import_species_payload(make_payload()) + last_error = None + break + except Exception as exc: # pragma: no cover - startup retry path + last_error = exc + print(f"Startup dependency check failed on attempt {attempt}: {exc}") + time.sleep(2) + + if last_error is not None: + raise last_error + + server = ThreadingHTTPServer((host, port), EcoSpeciesHandler) + print(f"EcoSpecies API listening on http://{host}:{port}") + server.serve_forever() + + +if __name__ == "__main__": + main() diff --git a/apps/api/src/ecospecies_api/auth.py b/apps/api/src/ecospecies_api/auth.py new file mode 100644 index 0000000..9d7f86c --- /dev/null +++ b/apps/api/src/ecospecies_api/auth.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import Mapping + + +ROLE_ORDER = { + "viewer": 1, + "editor": 2, + "admin": 3, +} + + +@dataclass(frozen=True) +class AuthSession: + token: str + username: str + role: str + + +def _normalize_role(role: str) -> str: + normalized = role.strip().lower() + if normalized not in ROLE_ORDER: + raise ValueError(f"Unsupported role: {role}") + return normalized + + +def _parse_token_entry(entry: str) -> tuple[str, AuthSession]: + parts = [part.strip() for part in entry.split(":")] + if len(parts) != 3: + raise ValueError( + "ECOSPECIES_AUTH_TOKENS entries must use the format token:username:role" + ) + + token, username, role = parts + if not token or not username: + raise ValueError("Auth token and username must be non-empty") + + return token, AuthSession(token=token, username=username, role=_normalize_role(role)) + + +def get_token_registry() -> dict[str, AuthSession]: + configured = os.environ.get("ECOSPECIES_AUTH_TOKENS", "").strip() + if not configured: + return {} + + registry: dict[str, AuthSession] = {} + for raw_entry in configured.split(","): + entry = raw_entry.strip() + if not entry: + continue + token, session = _parse_token_entry(entry) + registry[token] = session + return registry + + +def get_bearer_token(headers: Mapping[str, str]) -> str | None: + auth_header = headers.get("Authorization", "").strip() + if auth_header.lower().startswith("bearer "): + token = auth_header[7:].strip() + return token or None + + token = headers.get("X-EcoSpecies-Token", "").strip() + return token or None + + +def resolve_auth_session(headers: Mapping[str, str]) -> AuthSession | None: + registry = get_token_registry() + token = get_bearer_token(headers) + if not token: + return None + return registry.get(token) + + +def auth_is_configured() -> bool: + return bool(get_token_registry()) + + +def role_satisfies(role: str, required_role: str) -> bool: + return ROLE_ORDER[_normalize_role(role)] >= ROLE_ORDER[_normalize_role(required_role)] diff --git a/apps/api/src/ecospecies_api/cli.py b/apps/api/src/ecospecies_api/cli.py new file mode 100644 index 0000000..d1b7503 --- /dev/null +++ b/apps/api/src/ecospecies_api/cli.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from ecospecies_api.app import make_payload +from ecospecies_api.repository import import_species_payload + + +def main() -> None: + payload = make_payload() + import_species_payload(payload) + print(f"Imported {len(payload)} species records.") + + +if __name__ == "__main__": + main() diff --git a/apps/api/src/ecospecies_api/db.py b/apps/api/src/ecospecies_api/db.py new file mode 100644 index 0000000..46bd7b0 --- /dev/null +++ b/apps/api/src/ecospecies_api/db.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +import os +from pathlib import Path + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + + +def get_database_url() -> str: + configured = os.environ.get("ECOSPECIES_DATABASE_URL") + if configured: + return configured + + default_path = Path(__file__).resolve().parents[4] / "var" / "sqlite" / "ecospecies.db" + default_path.parent.mkdir(parents=True, exist_ok=True) + return f"sqlite:///{default_path}" + + +def create_db_engine(): + return create_engine(get_database_url(), future=True) + + +SessionLocal = sessionmaker(bind=create_db_engine(), autoflush=False, autocommit=False, future=True) diff --git a/apps/api/src/ecospecies_api/models.py b/apps/api/src/ecospecies_api/models.py new file mode 100644 index 0000000..89248cd --- /dev/null +++ b/apps/api/src/ecospecies_api/models.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +from sqlalchemy import Boolean, ForeignKey, Integer, String, Text +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship + + +class Base(DeclarativeBase): + pass + + +class Species(Base): + __tablename__ = "species" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + slug: Mapped[str] = mapped_column(String(255), unique=True, index=True) + source_file: Mapped[str] = mapped_column(String(255)) + title: Mapped[str] = mapped_column(String(500)) + common_name: Mapped[str] = mapped_column(String(255), default="") + scientific_name: Mapped[str] = mapped_column(String(255), default="") + flelmr_code: Mapped[str] = mapped_column(String(64), default="") + summary: Mapped[str] = mapped_column(Text, default="") + section_count: Mapped[int] = mapped_column(Integer, default=0) + publication_status: Mapped[str] = mapped_column(String(32), default="published", index=True) + is_archived: Mapped[bool] = mapped_column(Boolean, default=False, index=True) + editor_notes: Mapped[str] = mapped_column(Text, default="") + last_modified_by: Mapped[str] = mapped_column(String(255), default="system-import") + + sections: Mapped[list["DocumentSection"]] = relationship( + back_populates="species", + cascade="all, delete-orphan", + order_by="DocumentSection.position", + ) + diagnostics: Mapped[list["IngestDiagnosticRecord"]] = relationship( + back_populates="species", + cascade="all, delete-orphan", + order_by="IngestDiagnosticRecord.id", + ) + audit_entries: Mapped[list["SpeciesAuditLog"]] = relationship( + back_populates="species", + cascade="all, delete-orphan", + order_by="SpeciesAuditLog.id.desc()", + ) + + +class DocumentSection(Base): + __tablename__ = "document_section" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + species_id: Mapped[int] = mapped_column(ForeignKey("species.id", ondelete="CASCADE"), index=True) + position: Mapped[int] = mapped_column(Integer) + heading: Mapped[str] = mapped_column(String(255)) + content: Mapped[str] = mapped_column(Text) + + species: Mapped[Species] = relationship(back_populates="sections") + + +class IngestDiagnosticRecord(Base): + __tablename__ = "ingest_diagnostic" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + species_id: Mapped[int] = mapped_column(ForeignKey("species.id", ondelete="CASCADE"), index=True) + level: Mapped[str] = mapped_column(String(32)) + code: Mapped[str] = mapped_column(String(128), index=True) + message: Mapped[str] = mapped_column(Text) + + species: Mapped[Species] = relationship(back_populates="diagnostics") + + +class SpeciesAuditLog(Base): + __tablename__ = "species_audit_log" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + species_id: Mapped[int] = mapped_column(ForeignKey("species.id", ondelete="CASCADE"), index=True) + changed_by: Mapped[str] = mapped_column(String(255)) + changed_at: Mapped[str] = mapped_column(String(64), index=True) + action: Mapped[str] = mapped_column(String(64)) + details_json: Mapped[str] = mapped_column(Text) + + species: Mapped[Species] = relationship(back_populates="audit_entries") diff --git a/apps/api/src/ecospecies_api/parser.py b/apps/api/src/ecospecies_api/parser.py new file mode 100644 index 0000000..1a02e42 --- /dev/null +++ b/apps/api/src/ecospecies_api/parser.py @@ -0,0 +1,224 @@ +from __future__ import annotations + +import os +import re +from dataclasses import dataclass +from pathlib import Path + + +SECTION_PATTERN = re.compile(r"^[A-Z][A-Z\s/&()-]{2,}$") +FIELD_PATTERN = re.compile(r"^(?P[A-Za-z/ _-]+):\s*(?P.*)$") +SUMMARY_MARKER_PATTERN = re.compile(r"^(summary(?:/abstract)?|abstract|executive summary):?\s*$", re.IGNORECASE) + + +@dataclass +class Section: + heading: str + content: str + + +@dataclass +class IngestDiagnostic: + level: str + code: str + message: str + + +@dataclass +class SpeciesRecord: + slug: str + source_file: str + title: str + common_name: str + scientific_name: str + flelmr_code: str + summary: str + section_count: int + sections: list[Section] + diagnostics: list[IngestDiagnostic] + + +def slugify(value: str) -> str: + cleaned = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") + return cleaned or "unknown-species" + + +def normalize_text(raw_text: str) -> str: + text = raw_text.replace("\ufeff", "") + text = text.replace("\r\n", "\n").replace("\r", "\n") + return text + + +def normalize_whitespace(value: str) -> str: + return re.sub(r"\s+", " ", value).strip() + + +def split_sections(lines: list[str]) -> list[Section]: + sections: list[Section] = [] + current_heading = "HEADER" + current_lines: list[str] = [] + + for raw_line in lines: + line = raw_line.rstrip() + stripped = line.strip() + if SECTION_PATTERN.fullmatch(stripped): + if current_lines: + sections.append( + Section( + heading=current_heading, + content="\n".join(current_lines).strip(), + ) + ) + current_heading = stripped + current_lines = [] + continue + current_lines.append(line) + + if current_lines: + sections.append( + Section( + heading=current_heading, + content="\n".join(current_lines).strip(), + ) + ) + + return [section for section in sections if section.content] + + +def extract_metadata(lines: list[str]) -> dict[str, str]: + metadata: dict[str, str] = {} + + for line in lines[:80]: + match = FIELD_PATTERN.match(line.strip()) + if not match: + continue + key = match.group("key").strip().lower() + value = match.group("value").strip() + metadata[key] = value + + # Legacy files vary between "FLELMR", "FLELMR Code", and similar labels. + if key.startswith("flelmr"): + metadata["flelmr"] = value + + return metadata + + +def extract_summary(lines: list[str], sections: list[Section]) -> str: + if not sections: + return "" + + header_content = "" + for section in sections: + if section.heading == "HEADER": + header_content = section.content + break + if not header_content: + return "" + + header_lines = [line.rstrip() for line in header_content.splitlines()] + for index, raw_line in enumerate(header_lines): + if not SUMMARY_MARKER_PATTERN.fullmatch(raw_line.strip()): + continue + + summary_lines: list[str] = [] + for candidate in header_lines[index + 1 :]: + stripped = candidate.strip() + if not stripped: + if summary_lines: + summary_lines.append("") + continue + if SECTION_PATTERN.fullmatch(stripped): + break + if stripped.startswith("[") and not summary_lines: + break + summary_lines.append(stripped) + + compact_lines = [line for line in summary_lines if line] + if compact_lines: + return normalize_whitespace(" ".join(compact_lines)) + return "" + + return "" + + +def parse_species_file(path: Path) -> SpeciesRecord: + lines = normalize_text(path.read_text(encoding="utf-8", errors="replace")).split("\n") + metadata = extract_metadata(lines) + sections = split_sections(lines) + diagnostics: list[IngestDiagnostic] = [] + + title = metadata.get("title", path.stem) + common_name = metadata.get("common name", path.stem.replace(" SLH", "")) + scientific_name = metadata.get("scientific name", "") + flelmr_code = metadata.get("flelmr", "") + + summary = extract_summary(lines, sections) + + if not scientific_name: + diagnostics.append( + IngestDiagnostic( + level="warning", + code="missing_scientific_name", + message="Scientific name could not be extracted from the source file.", + ) + ) + if not flelmr_code: + diagnostics.append( + IngestDiagnostic( + level="warning", + code="missing_flelmr_code", + message="FLELMR code could not be extracted from the source file.", + ) + ) + if len(sections) < 3: + diagnostics.append( + IngestDiagnostic( + level="warning", + code="low_section_count", + message="Very few top-level sections were detected; headings may need parser refinement.", + ) + ) + if not summary: + diagnostics.append( + IngestDiagnostic( + level="warning", + code="missing_summary", + message="No summary text was extracted from the source file.", + ) + ) + + slug_base = common_name or title or path.stem + return SpeciesRecord( + slug=slugify(slug_base), + source_file=path.name, + title=title, + common_name=common_name, + scientific_name=scientific_name, + flelmr_code=flelmr_code, + summary=summary, + section_count=len(sections), + sections=sections, + diagnostics=diagnostics, + ) + + +def load_species_records(data_dir: str) -> list[SpeciesRecord]: + base = Path(data_dir) + if not base.exists(): + return [] + + records: list[SpeciesRecord] = [] + for path in sorted(base.glob("*.txt")): + records.append(parse_species_file(path)) + return records + + +def get_default_data_dir() -> str: + return os.environ.get( + "ECOSPECIES_DATA_DIR", + str( + Path(__file__).resolve().parents[4].parent + / "01-legacy-code-and-data" + / "InputFiles - TXT" + ), + ) diff --git a/apps/api/src/ecospecies_api/repository.py b/apps/api/src/ecospecies_api/repository.py new file mode 100644 index 0000000..1be8d43 --- /dev/null +++ b/apps/api/src/ecospecies_api/repository.py @@ -0,0 +1,508 @@ +from __future__ import annotations + +from collections import Counter +from datetime import datetime, timezone +import json + +from sqlalchemy import inspect, select, text +from sqlalchemy.exc import SQLAlchemyError + +from ecospecies_api.db import SessionLocal, create_db_engine +from ecospecies_api.models import Base, DocumentSection, IngestDiagnosticRecord, Species, SpeciesAuditLog + +WORKFLOW_STATUSES = {"draft", "review", "published"} +SYSTEM_IMPORT_USER = "system-import" + + +def ensure_schema() -> None: + engine = create_db_engine() + Base.metadata.create_all(engine) + inspector = inspect(engine) + species_columns = {column["name"] for column in inspector.get_columns("species")} + statements: list[str] = [] + if "publication_status" not in species_columns: + statements.append("ALTER TABLE species ADD COLUMN publication_status VARCHAR(32) DEFAULT 'published'") + if "is_archived" not in species_columns: + statements.append("ALTER TABLE species ADD COLUMN is_archived BOOLEAN DEFAULT FALSE") + if "editor_notes" not in species_columns: + statements.append("ALTER TABLE species ADD COLUMN editor_notes TEXT DEFAULT ''") + if "last_modified_by" not in species_columns: + statements.append("ALTER TABLE species ADD COLUMN last_modified_by VARCHAR(255) DEFAULT 'system-import'") + if statements: + with engine.begin() as connection: + for statement in statements: + connection.execute(text(statement)) + connection.execute( + text( + "UPDATE species SET publication_status = COALESCE(publication_status, 'published'), " + "is_archived = COALESCE(is_archived, FALSE), " + "editor_notes = COALESCE(editor_notes, ''), " + "last_modified_by = COALESCE(last_modified_by, 'system-import')" + ) + ) + + +def import_species_payload(payload: list[dict[str, object]]) -> None: + ensure_schema() + with SessionLocal() as session: + existing_species = { + item.slug: item for item in session.scalars(select(Species)).all() + } + incoming_slugs: set[str] = set() + for item in payload: + slug = item["slug"] + incoming_slugs.add(slug) + species = existing_species.get(slug) + if species is None: + species = Species( + slug=slug, + source_file=item["source_file"], + title=item["title"], + common_name=item["common_name"], + scientific_name=item["scientific_name"], + flelmr_code=item["flelmr_code"], + summary=item["summary"], + section_count=item["section_count"], + publication_status="published", + is_archived=False, + editor_notes="", + last_modified_by=SYSTEM_IMPORT_USER, + ) + session.add(species) + session.flush() + + _ = species.sections + _ = species.diagnostics + _ = species.audit_entries + + editorial_fields, section_positions = _get_editor_preservation_state(species) + + species.source_file = item["source_file"] + species.title = item["title"] + species.common_name = item["common_name"] + species.scientific_name = item["scientific_name"] + species.flelmr_code = item["flelmr_code"] + species.section_count = item["section_count"] + if species.is_archived: + species.is_archived = False + session.add( + SpeciesAuditLog( + species_id=species.id, + changed_by=SYSTEM_IMPORT_USER, + changed_at=datetime.now(timezone.utc).isoformat(), + action="import_restore", + details_json=json.dumps( + {"is_archived": {"from": True, "to": False}}, + ensure_ascii=True, + ), + ) + ) + + if "summary" not in editorial_fields: + species.summary = item["summary"] + if species.last_modified_by == "": + species.last_modified_by = SYSTEM_IMPORT_USER + + existing_sections = {section.position: section for section in species.sections} + incoming_positions: set[int] = set() + for position, section_payload in enumerate(item["sections"], start=1): + incoming_positions.add(position) + section = existing_sections.get(position) + if section is None: + section = DocumentSection( + species_id=species.id, + position=position, + heading=section_payload["heading"], + content=section_payload["content"], + ) + session.add(section) + continue + + section.heading = section_payload["heading"] + if position not in section_positions: + section.content = section_payload["content"] + session.add(section) + + for position, section in existing_sections.items(): + if position not in incoming_positions: + session.delete(section) + + for diagnostic in list(species.diagnostics): + session.delete(diagnostic) + + for diagnostic in item["diagnostics"]: + if diagnostic["code"] == "missing_summary": + continue + session.add( + IngestDiagnosticRecord( + species_id=species.id, + level=diagnostic["level"], + code=diagnostic["code"], + message=diagnostic["message"], + ) + ) + + session.add(species) + + for slug, species in existing_species.items(): + if slug in incoming_slugs: + continue + if not species.is_archived: + species.is_archived = True + session.add( + SpeciesAuditLog( + species_id=species.id, + changed_by=SYSTEM_IMPORT_USER, + changed_at=datetime.now(timezone.utc).isoformat(), + action="import_archive", + details_json=json.dumps( + {"is_archived": {"from": False, "to": True}}, + ensure_ascii=True, + ), + ) + ) + session.add(species) + + session.commit() + + +def _get_editor_preservation_state(species: Species) -> tuple[set[str], set[int]]: + editorial_fields: set[str] = set() + section_positions: set[int] = set() + + for entry in species.audit_entries: + try: + details = json.loads(entry.details_json) + except json.JSONDecodeError: + continue + + if entry.action == "editorial_update": + editorial_fields.update(details.keys()) + elif entry.action == "section_update": + section_position = details.get("section_position") + if isinstance(section_position, int): + section_positions.add(section_position) + + return editorial_fields, section_positions + + +def has_species_data() -> bool: + ensure_schema() + with SessionLocal() as session: + species = session.scalar(select(Species.id).limit(1)) + return species is not None + + +def get_readiness_status() -> dict[str, object]: + try: + ensure_schema() + with SessionLocal() as session: + species_count = session.query(Species).count() + return { + "ready": True, + "database": "ok", + "species_count": species_count, + "data_state": "loaded" if species_count > 0 else "empty", + } + except SQLAlchemyError as exc: + return { + "ready": False, + "database": "error", + "species_count": None, + "data_state": "unavailable", + "error": str(exc), + } + + +def _species_to_payload(species: Species, include_sections: bool = True) -> dict[str, object]: + return { + "slug": species.slug, + "source_file": species.source_file, + "title": species.title, + "common_name": species.common_name, + "scientific_name": species.scientific_name, + "flelmr_code": species.flelmr_code, + "summary": species.summary, + "section_count": species.section_count, + "publication_status": species.publication_status, + "is_archived": species.is_archived, + "editor_notes": species.editor_notes, + "last_modified_by": species.last_modified_by, + "diagnostics": [ + {"level": diagnostic.level, "code": diagnostic.code, "message": diagnostic.message} + for diagnostic in species.diagnostics + ], + "sections": ( + [ + { + "id": section.id, + "position": section.position, + "heading": section.heading, + "content": section.content, + } + for section in species.sections + ] + if include_sections + else [] + ), + } + + +def list_species( + search: str = "", + include_unpublished: bool = False, + include_archived: bool = False, +) -> list[dict[str, object]]: + ensure_schema() + with SessionLocal() as session: + query = select(Species).order_by(Species.common_name, Species.title) + species = list(session.scalars(query)) + payload = [_species_to_payload(item, include_sections=False) for item in species] + if not include_archived: + payload = [item for item in payload if not item["is_archived"]] + if not include_unpublished: + payload = [item for item in payload if item["publication_status"] == "published"] + if search: + needle = search.lower() + payload = [ + item + for item in payload + if needle in item["common_name"].lower() + or needle in item["scientific_name"].lower() + or needle in item["title"].lower() + ] + return payload + + +def get_species_by_slug( + slug: str, + include_unpublished: bool = False, + include_archived: bool = False, +) -> dict[str, object] | None: + ensure_schema() + with SessionLocal() as session: + species = session.scalar(select(Species).where(Species.slug == slug)) + if species is None: + return None + if not include_archived and species.is_archived: + return None + if not include_unpublished and species.publication_status != "published": + return None + _ = species.sections + _ = species.diagnostics + return _species_to_payload(species, include_sections=True) + + +def get_summary_metrics() -> dict[str, object]: + species = list_species() + section_total = sum(item["section_count"] for item in species) + counter = Counter() + for item in species: + for diagnostic in item["diagnostics"]: + counter[diagnostic["code"]] += 1 + return { + "species_count": len(species), + "section_count": section_total, + "diagnostic_counts": dict(counter), + } + + +def list_diagnostics() -> list[dict[str, object]]: + species = list_species() + return [ + { + "slug": item["slug"], + "common_name": item["common_name"], + "source_file": item["source_file"], + "diagnostics": item["diagnostics"], + } + for item in species + if item["diagnostics"] + ] + + +def get_editor_species_list(search: str = "") -> list[dict[str, object]]: + return list_species(search=search, include_unpublished=True, include_archived=True) + + +def get_editor_species_workflow(slug: str) -> dict[str, object] | None: + item = get_species_by_slug(slug, include_unpublished=True, include_archived=True) + if item is None: + return None + return { + "slug": item["slug"], + "title": item["title"], + "common_name": item["common_name"], + "publication_status": item["publication_status"], + "is_archived": item["is_archived"], + "editor_notes": item["editor_notes"], + "last_modified_by": item["last_modified_by"], + "diagnostic_count": len(item["diagnostics"]), + } + + +def get_editor_species_detail(slug: str) -> dict[str, object] | None: + return get_species_by_slug(slug, include_unpublished=True, include_archived=True) + + +def list_species_audit(slug: str) -> list[dict[str, object]] | None: + ensure_schema() + with SessionLocal() as session: + species = session.scalar(select(Species).where(Species.slug == slug)) + if species is None: + return None + _ = species.audit_entries + return [ + { + "id": entry.id, + "changed_by": entry.changed_by, + "changed_at": entry.changed_at, + "action": entry.action, + "details": json.loads(entry.details_json), + } + for entry in species.audit_entries + ] + + +def update_species_editorial( + slug: str, + publication_status: str | None, + summary: str | None, + editor_notes: str | None, + is_archived: bool | None, + username: str, +) -> dict[str, object] | None: + ensure_schema() + with SessionLocal() as session: + species = session.scalar(select(Species).where(Species.slug == slug)) + if species is None: + return None + before = { + "publication_status": species.publication_status, + "summary": species.summary, + "editor_notes": species.editor_notes, + "is_archived": species.is_archived, + } + + if publication_status is not None: + normalized = publication_status.strip().lower() + if normalized not in WORKFLOW_STATUSES: + raise ValueError( + f"Unsupported publication_status: {publication_status}. " + f"Expected one of {sorted(WORKFLOW_STATUSES)}" + ) + species.publication_status = normalized + + if summary is not None: + species.summary = summary.strip() + + if editor_notes is not None: + species.editor_notes = editor_notes.strip() + + if is_archived is not None: + species.is_archived = is_archived + + after = { + "publication_status": species.publication_status, + "summary": species.summary, + "editor_notes": species.editor_notes, + "is_archived": species.is_archived, + } + changed_fields = { + key: {"from": before[key], "to": after[key]} + for key in before + if before[key] != after[key] + } + + if changed_fields: + species.last_modified_by = username + session.add( + SpeciesAuditLog( + species_id=species.id, + changed_by=username, + changed_at=datetime.now(timezone.utc).isoformat(), + action="editorial_update", + details_json=json.dumps(changed_fields, ensure_ascii=True), + ) + ) + + session.add(species) + session.commit() + session.refresh(species) + + return { + "slug": species.slug, + "summary": species.summary, + "publication_status": species.publication_status, + "editor_notes": species.editor_notes, + "is_archived": species.is_archived, + "last_modified_by": species.last_modified_by, + "changed_fields": changed_fields, + } + + +def update_species_section( + slug: str, + section_position: int, + content: str, + username: str, +) -> dict[str, object] | None: + ensure_schema() + with SessionLocal() as session: + species = session.scalar(select(Species).where(Species.slug == slug)) + if species is None: + return None + + section = session.scalar( + select(DocumentSection).where( + DocumentSection.species_id == species.id, + DocumentSection.position == section_position, + ) + ) + if section is None: + return None + + new_content = content.strip() + changed_fields = {} + if section.content != new_content: + changed_fields["section_content"] = { + "from": section.content, + "to": new_content, + } + + if changed_fields: + section.content = new_content + species.last_modified_by = username + session.add( + SpeciesAuditLog( + species_id=species.id, + changed_by=username, + changed_at=datetime.now(timezone.utc).isoformat(), + action="section_update", + details_json=json.dumps( + { + "section_position": section.position, + "section_heading": section.heading, + **changed_fields, + }, + ensure_ascii=True, + ), + ) + ) + + session.add(section) + session.add(species) + session.commit() + session.refresh(section) + + return { + "slug": species.slug, + "section": { + "id": section.id, + "position": section.position, + "heading": section.heading, + "content": section.content, + }, + "last_modified_by": species.last_modified_by, + "changed_fields": changed_fields, + } diff --git a/apps/api/test_repository.py b/apps/api/test_repository.py new file mode 100644 index 0000000..969ad98 --- /dev/null +++ b/apps/api/test_repository.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parent +SRC = ROOT / "src" +if str(SRC) not in sys.path: + sys.path.insert(0, str(SRC)) + +TEST_PATH = ROOT / "tests" / "test_repository.py" +SPEC = importlib.util.spec_from_file_location("ecospecies_api_test_repository", TEST_PATH) +MODULE = importlib.util.module_from_spec(SPEC) +assert SPEC is not None and SPEC.loader is not None +SPEC.loader.exec_module(MODULE) + +for name in dir(MODULE): + if name.startswith("Test") or name.endswith("Tests"): + globals()[name] = getattr(MODULE, name) diff --git a/apps/api/tests/test_repository.py b/apps/api/tests/test_repository.py new file mode 100644 index 0000000..1219234 --- /dev/null +++ b/apps/api/tests/test_repository.py @@ -0,0 +1,307 @@ +from __future__ import annotations + +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from ecospecies_api import repository + + +SAMPLE_PAYLOAD = [ + { + "slug": "test-shad", + "source_file": "Test Shad.txt", + "title": "Test Shad (Alosa testus)", + "common_name": "Test Shad", + "scientific_name": "Alosa testus", + "flelmr_code": "9999", + "summary": "", + "section_count": 2, + "diagnostics": [ + { + "level": "warning", + "code": "missing_summary", + "message": "Summary/Abstract section is missing.", + }, + { + "level": "warning", + "code": "missing_citations", + "message": "References section not found.", + }, + ], + "sections": [ + {"heading": "HEADER", "content": "Header content"}, + {"heading": "HABITAT", "content": "Habitat content"}, + ], + } +] + +UPDATED_PAYLOAD = [ + { + "slug": "test-shad", + "source_file": "Test Shad v2.txt", + "title": "Test Shad Revised (Alosa testus)", + "common_name": "Test Shad", + "scientific_name": "Alosa testus revised", + "flelmr_code": "1000", + "summary": "Imported replacement summary.", + "section_count": 2, + "diagnostics": [ + { + "level": "warning", + "code": "missing_flelmr_code", + "message": "Replacement diagnostic.", + } + ], + "sections": [ + {"heading": "HEADER", "content": "Replacement header content"}, + {"heading": "HABITAT", "content": "Replacement habitat content"}, + ], + } +] + +DIFFERENT_PAYLOAD = [ + { + "slug": "other-fish", + "source_file": "Other Fish.txt", + "title": "Other Fish (Pisces otherus)", + "common_name": "Other Fish", + "scientific_name": "Pisces otherus", + "flelmr_code": "2000", + "summary": "Other fish summary.", + "section_count": 1, + "diagnostics": [], + "sections": [ + {"heading": "HEADER", "content": "Other fish header"}, + ], + } +] + + +class RepositoryWorkflowTests(unittest.TestCase): + def setUp(self) -> None: + self.tempdir = tempfile.TemporaryDirectory() + db_path = Path(self.tempdir.name) / "test.db" + self.engine = create_engine(f"sqlite:///{db_path}", future=True) + self.session_local = sessionmaker( + bind=self.engine, + autoflush=False, + autocommit=False, + future=True, + ) + self.engine_patch = patch.object(repository, "create_db_engine", return_value=self.engine) + self.session_patch = patch.object(repository, "SessionLocal", self.session_local) + self.engine_patch.start() + self.session_patch.start() + repository.import_species_payload(SAMPLE_PAYLOAD) + + def tearDown(self) -> None: + self.session_patch.stop() + self.engine_patch.stop() + self.engine.dispose() + self.tempdir.cleanup() + + def test_import_filters_missing_summary_diagnostic_from_accepted_dataset(self) -> None: + detail = repository.get_species_by_slug("test-shad") + + self.assertIsNotNone(detail) + self.assertEqual(detail["section_count"], 2) + self.assertEqual([section["position"] for section in detail["sections"]], [1, 2]) + self.assertEqual([item["code"] for item in detail["diagnostics"]], ["missing_citations"]) + + def test_editorial_update_changes_publication_visibility_and_creates_audit(self) -> None: + result = repository.update_species_editorial( + slug="test-shad", + publication_status="draft", + summary="Editor-authored summary.", + editor_notes="Needs another review pass.", + is_archived=None, + username="bob", + ) + + self.assertIsNotNone(result) + self.assertEqual(result["publication_status"], "draft") + self.assertEqual(result["summary"], "Editor-authored summary.") + self.assertEqual(result["last_modified_by"], "bob") + self.assertEqual(repository.get_species_by_slug("test-shad"), None) + + editor_detail = repository.get_editor_species_detail("test-shad") + audit = repository.list_species_audit("test-shad") + + self.assertIsNotNone(editor_detail) + self.assertEqual(editor_detail["publication_status"], "draft") + self.assertEqual(editor_detail["summary"], "Editor-authored summary.") + self.assertEqual(editor_detail["editor_notes"], "Needs another review pass.") + self.assertIsNotNone(audit) + self.assertEqual(audit[0]["action"], "editorial_update") + self.assertEqual(audit[0]["changed_by"], "bob") + self.assertIn("summary", audit[0]["details"]) + self.assertIn("publication_status", audit[0]["details"]) + + def test_section_update_records_section_audit_metadata(self) -> None: + result = repository.update_species_section( + slug="test-shad", + section_position=2, + content="Updated habitat content.", + username="carol", + ) + + self.assertIsNotNone(result) + self.assertEqual(result["section"]["position"], 2) + self.assertEqual(result["section"]["content"], "Updated habitat content.") + self.assertEqual(result["last_modified_by"], "carol") + self.assertEqual(sorted(result["changed_fields"].keys()), ["section_content"]) + + editor_detail = repository.get_editor_species_detail("test-shad") + audit = repository.list_species_audit("test-shad") + + self.assertIsNotNone(editor_detail) + self.assertEqual(editor_detail["sections"][1]["content"], "Updated habitat content.") + self.assertIsNotNone(audit) + self.assertEqual(audit[0]["action"], "section_update") + self.assertEqual(audit[0]["changed_by"], "carol") + self.assertEqual(audit[0]["details"]["section_position"], 2) + self.assertEqual(audit[0]["details"]["section_heading"], "HABITAT") + self.assertEqual( + audit[0]["details"]["section_content"], + {"from": "Habitat content", "to": "Updated habitat content."}, + ) + + def test_reimport_preserves_editorial_state_and_audit_history(self) -> None: + repository.update_species_editorial( + slug="test-shad", + publication_status="draft", + summary="Editor-authored summary.", + editor_notes="Needs another review pass.", + is_archived=None, + username="bob", + ) + repository.update_species_section( + slug="test-shad", + section_position=2, + content="Updated habitat content.", + username="carol", + ) + + repository.import_species_payload(UPDATED_PAYLOAD) + + editor_detail = repository.get_editor_species_detail("test-shad") + audit = repository.list_species_audit("test-shad") + + self.assertIsNotNone(editor_detail) + self.assertEqual(editor_detail["source_file"], "Test Shad v2.txt") + self.assertEqual(editor_detail["title"], "Test Shad Revised (Alosa testus)") + self.assertEqual(editor_detail["scientific_name"], "Alosa testus revised") + self.assertEqual(editor_detail["flelmr_code"], "1000") + self.assertEqual(editor_detail["publication_status"], "draft") + self.assertEqual(editor_detail["summary"], "Editor-authored summary.") + self.assertEqual(editor_detail["editor_notes"], "Needs another review pass.") + self.assertEqual(editor_detail["sections"][0]["content"], "Replacement header content") + self.assertEqual(editor_detail["sections"][1]["content"], "Updated habitat content.") + self.assertEqual([item["code"] for item in editor_detail["diagnostics"]], ["missing_flelmr_code"]) + self.assertIsNotNone(audit) + self.assertEqual(len(audit), 2) + self.assertEqual([entry["action"] for entry in audit], ["section_update", "editorial_update"]) + + def test_reimport_updates_summary_when_no_editorial_override_exists(self) -> None: + repository.import_species_payload(UPDATED_PAYLOAD) + + detail = repository.get_species_by_slug("test-shad") + + self.assertIsNotNone(detail) + self.assertEqual(detail["summary"], "Imported replacement summary.") + self.assertEqual(detail["sections"][0]["content"], "Replacement header content") + + def test_editor_can_archive_species_explicitly(self) -> None: + result = repository.update_species_editorial( + slug="test-shad", + publication_status=None, + summary=None, + editor_notes=None, + is_archived=True, + username="dana", + ) + + public_detail = repository.get_species_by_slug("test-shad") + editor_detail = repository.get_editor_species_detail("test-shad") + audit = repository.list_species_audit("test-shad") + + self.assertIsNotNone(result) + self.assertTrue(result["is_archived"]) + self.assertEqual(result["last_modified_by"], "dana") + self.assertIsNone(public_detail) + self.assertIsNotNone(editor_detail) + self.assertTrue(editor_detail["is_archived"]) + self.assertIsNotNone(audit) + self.assertEqual(audit[0]["action"], "editorial_update") + self.assertEqual(audit[0]["details"]["is_archived"], {"from": False, "to": True}) + + def test_editor_can_unarchive_species_explicitly(self) -> None: + repository.update_species_editorial( + slug="test-shad", + publication_status=None, + summary=None, + editor_notes=None, + is_archived=True, + username="dana", + ) + + result = repository.update_species_editorial( + slug="test-shad", + publication_status=None, + summary=None, + editor_notes=None, + is_archived=False, + username="erin", + ) + + public_detail = repository.get_species_by_slug("test-shad") + audit = repository.list_species_audit("test-shad") + + self.assertIsNotNone(result) + self.assertFalse(result["is_archived"]) + self.assertEqual(result["last_modified_by"], "erin") + self.assertIsNotNone(public_detail) + self.assertIsNotNone(audit) + self.assertEqual(audit[0]["details"]["is_archived"], {"from": True, "to": False}) + + def test_missing_species_is_archived_instead_of_deleted(self) -> None: + repository.import_species_payload(DIFFERENT_PAYLOAD) + + public_detail = repository.get_species_by_slug("test-shad") + editor_detail = repository.get_editor_species_detail("test-shad") + editor_items = repository.get_editor_species_list() + audit = repository.list_species_audit("test-shad") + + self.assertIsNone(public_detail) + self.assertIsNotNone(editor_detail) + self.assertTrue(editor_detail["is_archived"]) + self.assertEqual([item["slug"] for item in repository.list_species()], ["other-fish"]) + self.assertEqual([item["slug"] for item in editor_items], ["other-fish", "test-shad"]) + self.assertIsNotNone(audit) + self.assertEqual(audit[0]["action"], "import_archive") + self.assertEqual(audit[0]["details"]["is_archived"], {"from": False, "to": True}) + + def test_archived_species_is_restored_when_it_reappears(self) -> None: + repository.import_species_payload(DIFFERENT_PAYLOAD) + repository.import_species_payload(UPDATED_PAYLOAD) + + public_detail = repository.get_species_by_slug("test-shad") + editor_detail = repository.get_editor_species_detail("test-shad") + audit = repository.list_species_audit("test-shad") + + self.assertIsNotNone(public_detail) + self.assertIsNotNone(editor_detail) + self.assertFalse(editor_detail["is_archived"]) + self.assertEqual(public_detail["summary"], "Imported replacement summary.") + self.assertIsNotNone(audit) + self.assertEqual(audit[0]["action"], "import_restore") + self.assertEqual(audit[0]["details"]["is_archived"], {"from": True, "to": False}) + + +if __name__ == "__main__": + unittest.main() diff --git a/apps/web/app.js b/apps/web/app.js new file mode 100644 index 0000000..454eae8 --- /dev/null +++ b/apps/web/app.js @@ -0,0 +1,349 @@ +const apiBase = ""; + +const speciesList = document.querySelector("#species-list"); +const searchInput = document.querySelector("#search"); +const archiveFilterGroup = document.querySelector("#archive-filter-group"); +const detailEmpty = document.querySelector("#detail-empty"); +const detail = document.querySelector("#detail"); +const detailCode = document.querySelector("#detail-code"); +const detailCommonName = document.querySelector("#detail-common-name"); +const detailArchiveBadge = document.querySelector("#detail-archive-badge"); +const detailArchiveNote = document.querySelector("#detail-archive-note"); +const detailScientificName = document.querySelector("#detail-scientific-name"); +const detailSummary = document.querySelector("#detail-summary"); +const detailSections = document.querySelector("#detail-sections"); +const speciesCount = document.querySelector("#species-count"); +const sectionCount = document.querySelector("#section-count"); +const authTokenInput = document.querySelector("#auth-token"); +const authSaveButton = document.querySelector("#auth-save"); +const authClearButton = document.querySelector("#auth-clear"); +const authStatus = document.querySelector("#auth-status"); +const editorPanel = document.querySelector("#editor-panel"); +const editorPublicationStatus = document.querySelector("#editor-publication-status"); +const editorSummary = document.querySelector("#editor-summary"); +const editorNotes = document.querySelector("#editor-notes"); +const editorIsArchived = document.querySelector("#editor-is-archived"); +const editorSaveButton = document.querySelector("#editor-save"); +const editorStatus = document.querySelector("#editor-status"); +const auditPanel = document.querySelector("#audit-panel"); +const auditList = document.querySelector("#audit-list"); + +let currentItems = []; +let currentSlug = null; +let currentSession = null; +let currentArchiveFilter = "active"; + +function getAuthToken() { + return window.localStorage.getItem("ecospecies_auth_token") || ""; +} + +function getAuthHeaders() { + const token = getAuthToken(); + return token ? { Authorization: `Bearer ${token}` } : {}; +} + +function escapeHtml(value) { + return value + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">"); +} + +async function requestJson(path, options = {}) { + const headers = new Headers(options.headers || {}); + const authHeaders = getAuthHeaders(); + for (const [key, value] of Object.entries(authHeaders)) { + headers.set(key, value); + } + if (options.body && !headers.has("Content-Type")) { + headers.set("Content-Type", "application/json"); + } + const response = await fetch(`${apiBase}${path}`, { ...options, headers }); + const data = await response.json(); + return { response, data }; +} + +function isEditorSession() { + return Boolean(currentSession && currentSession.user && ["editor", "admin"].includes(currentSession.user.role)); +} + +function getVisibleItems(items) { + if (!isEditorSession()) { + return items; + } + if (currentArchiveFilter === "archived") { + return items.filter((item) => item.is_archived); + } + if (currentArchiveFilter === "all") { + return items; + } + return items.filter((item) => !item.is_archived); +} + +function syncArchiveFilterUi() { + archiveFilterGroup.classList.toggle("hidden", !isEditorSession()); + for (const button of archiveFilterGroup.querySelectorAll("[data-archive-filter]")) { + button.classList.toggle("is-active", button.dataset.archiveFilter === currentArchiveFilter); + } +} + +async function loadSession() { + const { data } = await requestJson("/api/auth/session"); + currentSession = data; + if (!isEditorSession()) { + currentArchiveFilter = "active"; + } + authTokenInput.value = getAuthToken(); + if (data.authenticated) { + authStatus.textContent = `${data.user.username} (${data.user.role})`; + } else if (data.auth_configured) { + authStatus.textContent = "Auth configured, public session"; + } else { + authStatus.textContent = "Public access"; + } + syncArchiveFilterUi(); +} + +async function loadSummary() { + const { data } = await requestJson("/api/insights/summary"); + speciesCount.textContent = data.species_count; + sectionCount.textContent = data.section_count; +} + +function renderSpecies(items) { + speciesList.innerHTML = ""; + const visibleItems = getVisibleItems(items); + + if (!visibleItems.length) { + speciesList.innerHTML = `

${isEditorSession() ? "No species match the current archive filter." : "No species match the current search."}

`; + return; + } + + for (const item of visibleItems) { + const button = document.createElement("button"); + button.className = item.is_archived ? "species-card species-card-archived" : "species-card"; + button.type = "button"; + const archivedMeta = item.is_archived ? `Archived` : ""; + button.innerHTML = ` + ${escapeHtml(item.common_name || item.title)} + ${escapeHtml(item.scientific_name || "Scientific name missing")} + ${escapeHtml(item.publication_status || "published")}${archivedMeta} + ${item.diagnostic_count ? `${item.diagnostic_count} ingest flags` : "No ingest flags"} + ${escapeHtml((item.summary || "No summary extracted yet.").slice(0, 180))} + `; + button.addEventListener("click", () => loadSpecies(item.slug)); + speciesList.appendChild(button); + } +} + +async function loadSpeciesList(search = "") { + const query = search ? `?search=${encodeURIComponent(search)}` : ""; + const path = isEditorSession() ? `/api/editor/species${query}` : `/api/species${query}`; + const { data } = await requestJson(path); + currentItems = data.items; + syncArchiveFilterUi(); + renderSpecies(currentItems); +} + +async function loadSpecies(slug) { + currentSlug = slug; + const path = isEditorSession() ? `/api/editor/species/${slug}` : `/api/species/${slug}`; + const { response, data } = await requestJson(path); + if (!response.ok) { + detailEmpty.classList.remove("hidden"); + detail.classList.add("hidden"); + speciesList.innerHTML = `

${escapeHtml(data.error || "Unable to load species.")}

`; + return; + } + + detailEmpty.classList.add("hidden"); + detail.classList.remove("hidden"); + + detailCode.textContent = data.flelmr_code ? `FLELMR ${data.flelmr_code}` : "Legacy source file"; + detailCommonName.textContent = data.common_name || data.title; + detailArchiveBadge.classList.toggle("hidden", !data.is_archived); + detailArchiveNote.classList.toggle("hidden", !data.is_archived); + detailScientificName.textContent = data.scientific_name || "Scientific name missing in source"; + detailSummary.textContent = data.summary || "No summary extracted from the current source file."; + editorPanel.classList.toggle("hidden", !isEditorSession()); + auditPanel.classList.toggle("hidden", !isEditorSession()); + if (isEditorSession()) { + editorPublicationStatus.value = data.publication_status || "published"; + editorSummary.value = data.summary || ""; + editorNotes.value = data.editor_notes || ""; + editorIsArchived.checked = Boolean(data.is_archived); + editorStatus.textContent = data.last_modified_by + ? `Last modified by ${data.last_modified_by}` + : "Editor session active"; + await loadAudit(slug); + } + + detailSections.innerHTML = ""; + if (data.diagnostics.length) { + const diagnosticsEl = document.createElement("section"); + diagnosticsEl.className = "detail-section detail-diagnostics"; + diagnosticsEl.innerHTML = ` +

Ingest Diagnostics

+
    + ${data.diagnostics + .map( + (diagnostic) => + `
  • ${escapeHtml(diagnostic.code)}: ${escapeHtml(diagnostic.message)}
  • `, + ) + .join("")} +
+ `; + detailSections.appendChild(diagnosticsEl); + } + for (const section of data.sections) { + const sectionEl = document.createElement("section"); + sectionEl.className = "detail-section"; + if (isEditorSession()) { + sectionEl.innerHTML = ` +

${escapeHtml(section.heading)}

+ +
+ +
+ `; + } else { + sectionEl.innerHTML = ` +

${escapeHtml(section.heading)}

+
${escapeHtml(section.content)}
+ `; + } + detailSections.appendChild(sectionEl); + } + + if (isEditorSession()) { + for (const button of detailSections.querySelectorAll(".section-save")) { + button.addEventListener("click", async (event) => { + const position = event.currentTarget.dataset.sectionPosition; + const textarea = detailSections.querySelector(`textarea[data-section-position="${position}"]`); + await saveSectionContent(Number(position), textarea.value); + }); + } + } +} + +function renderAudit(items) { + auditList.innerHTML = ""; + if (!items.length) { + auditList.innerHTML = `

No audit entries yet.

`; + return; + } + + for (const item of items) { + const entry = document.createElement("article"); + entry.className = "audit-entry"; + const detailRows = Object.entries(item.details) + .map(([field, values]) => { + if (values && typeof values === "object" && "from" in values && "to" in values) { + return `
  • ${escapeHtml(field)}: ${escapeHtml(String(values.from || ""))} -> ${escapeHtml(String(values.to || ""))}
  • `; + } + return `
  • ${escapeHtml(field)}: ${escapeHtml(String(values ?? ""))}
  • `; + }) + .join(""); + entry.innerHTML = ` +

    ${escapeHtml(item.changed_by)} • ${escapeHtml(item.changed_at)} • ${escapeHtml(item.action)}

    +
      ${detailRows}
    + `; + auditList.appendChild(entry); + } +} + +async function loadAudit(slug) { + if (!isEditorSession()) { + return; + } + const { response, data } = await requestJson(`/api/editor/species/${slug}/audit`); + if (!response.ok) { + auditList.innerHTML = `

    ${escapeHtml(data.error || "Unable to load audit history.")}

    `; + return; + } + renderAudit(data.items); +} + +async function saveEditorialChanges() { + if (!currentSlug || !isEditorSession()) { + return; + } + editorStatus.textContent = "Saving..."; + const { response, data } = await requestJson(`/api/editor/species/${currentSlug}/editorial`, { + method: "POST", + body: JSON.stringify({ + publication_status: editorPublicationStatus.value, + summary: editorSummary.value, + editor_notes: editorNotes.value, + is_archived: editorIsArchived.checked, + }), + }); + if (!response.ok) { + editorStatus.textContent = data.error || "Save failed"; + return; + } + editorStatus.textContent = `Saved by ${data.last_modified_by}`; + await Promise.all([loadSummary(), loadSpeciesList(searchInput.value), loadSpecies(currentSlug)]); +} + +async function saveSectionContent(sectionPosition, content) { + if (!currentSlug || !isEditorSession()) { + return; + } + editorStatus.textContent = `Saving section ${sectionPosition}...`; + const { response, data } = await requestJson(`/api/editor/species/${currentSlug}/sections/${sectionPosition}`, { + method: "POST", + body: JSON.stringify({ content }), + }); + if (!response.ok) { + editorStatus.textContent = data.error || "Section save failed"; + return; + } + editorStatus.textContent = `Section ${sectionPosition} saved by ${data.last_modified_by}`; + await loadSpecies(currentSlug); +} + +searchInput.addEventListener("input", async (event) => { + await loadSpeciesList(event.target.value); +}); + +for (const button of archiveFilterGroup.querySelectorAll("[data-archive-filter]")) { + button.addEventListener("click", () => { + currentArchiveFilter = button.dataset.archiveFilter || "active"; + syncArchiveFilterUi(); + renderSpecies(currentItems); + }); +} + +authSaveButton.addEventListener("click", async () => { + const token = authTokenInput.value.trim(); + if (token) { + window.localStorage.setItem("ecospecies_auth_token", token); + } + await loadSession(); + await loadSpeciesList(searchInput.value); + if (currentSlug) { + await loadSpecies(currentSlug); + } +}); + +authClearButton.addEventListener("click", async () => { + window.localStorage.removeItem("ecospecies_auth_token"); + authTokenInput.value = ""; + await loadSession(); + await loadSpeciesList(searchInput.value); + if (currentSlug) { + await loadSpecies(currentSlug); + } +}); + +editorSaveButton.addEventListener("click", saveEditorialChanges); + +async function bootstrap() { + await loadSession(); + await Promise.all([loadSummary(), loadSpeciesList()]); +} + +bootstrap().catch((error) => { + speciesList.innerHTML = `

    Failed to load data: ${escapeHtml(String(error))}

    `; +}); diff --git a/apps/web/index.html b/apps/web/index.html new file mode 100644 index 0000000..e1f969a --- /dev/null +++ b/apps/web/index.html @@ -0,0 +1,109 @@ + + + + + + EcoSpecies + + + +
    +
    +

    Marine Species Knowledge System

    +

    EcoSpecies

    +

    + A modern follow-on for the legacy EcoSpecies archive, starting with direct ingestion + of historical Species Life History text files. +

    +
    + + + +

    Public access

    +
    +
    +
    + 0 + +
    +
    + 0 + +
    +
    +
    + +
    + + +
    +
    +

    Select a species

    +

    Browse the migrated legacy corpus and inspect parsed sections from the original SLH files.

    +
    + + +
    +
    + +
    +

    + This migration path preserves attribution for Dr. Peter Rubec, Dr. Diane Blackwood, + Dr. Welsbery R. Elsberry, and the Florida Fish and Wildlife Research Institute context + documented in the legacy project materials. +

    +
    +
    + + + diff --git a/apps/web/nginx.conf b/apps/web/nginx.conf new file mode 100644 index 0000000..29d2394 --- /dev/null +++ b/apps/web/nginx.conf @@ -0,0 +1,32 @@ +server { + listen 80; + server_name _; + + root /usr/share/nginx/html; + index index.html; + + location /api/ { + proxy_pass http://api:8000/api/; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /healthz { + proxy_pass http://api:8000/healthz; + proxy_http_version 1.1; + proxy_set_header Host $host; + } + + location /readyz { + proxy_pass http://api:8000/readyz; + proxy_http_version 1.1; + proxy_set_header Host $host; + } + + location / { + try_files $uri $uri/ /index.html; + } +} diff --git a/apps/web/styles.css b/apps/web/styles.css new file mode 100644 index 0000000..b5aa5cb --- /dev/null +++ b/apps/web/styles.css @@ -0,0 +1,427 @@ +:root { + --bg: #f4efe6; + --paper: rgba(255, 252, 247, 0.78); + --ink: #16251f; + --muted: #58655f; + --accent: #0f766e; + --accent-2: #bc6c25; + --line: rgba(22, 37, 31, 0.12); + --shadow: 0 24px 70px rgba(24, 35, 30, 0.15); +} + +* { + box-sizing: border-box; +} + +body { + margin: 0; + font-family: Georgia, "Times New Roman", serif; + color: var(--ink); + background: + radial-gradient(circle at top left, rgba(15, 118, 110, 0.14), transparent 28%), + radial-gradient(circle at top right, rgba(188, 108, 37, 0.16), transparent 24%), + linear-gradient(180deg, #f8f4ec, #efe6d7 70%, #e7dcc9); +} + +.page { + width: min(1320px, calc(100vw - 32px)); + margin: 0 auto; + padding: 24px 0 40px; +} + +.hero, +.panel, +.footer { + backdrop-filter: blur(10px); + background: var(--paper); + border: 1px solid var(--line); + border-radius: 24px; + box-shadow: var(--shadow); +} + +.hero { + padding: 28px; + margin-bottom: 20px; +} + +.eyebrow { + margin: 0 0 10px; + text-transform: uppercase; + letter-spacing: 0.18em; + font-size: 0.78rem; + color: var(--accent); +} + +h1 { + margin: 0; + font-size: clamp(2.8rem, 7vw, 5.6rem); + line-height: 0.92; +} + +.lede { + max-width: 64ch; + color: var(--muted); + font-size: 1.08rem; +} + +.hero-stats { + display: flex; + gap: 16px; + flex-wrap: wrap; + margin-top: 24px; +} + +.auth-bar { + display: flex; + gap: 10px; + flex-wrap: wrap; + align-items: center; + margin-top: 18px; +} + +.auth-bar input { + min-width: min(360px, 100%); + flex: 1; +} + +.auth-status { + margin: 0; + color: var(--muted); +} + +.stat { + min-width: 180px; + padding: 14px 16px; + border-radius: 18px; + background: rgba(255, 255, 255, 0.6); + border: 1px solid var(--line); +} + +.stat span { + display: block; + font-size: 2rem; + font-weight: 700; +} + +.stat label { + color: var(--muted); +} + +.workspace { + display: grid; + grid-template-columns: 360px minmax(0, 1fr); + gap: 20px; +} + +.panel { + padding: 18px; +} + +.panel-header { + display: flex; + flex-direction: column; + gap: 12px; + margin-bottom: 14px; +} + +.panel-header h2, +.empty-state h2, +.detail-header h2 { + margin: 0; +} + +.archive-filter-group { + display: flex; + flex-wrap: wrap; + gap: 8px; +} + +.archive-filter-button { + padding: 8px 12px; + background: rgba(255, 255, 255, 0.72); + color: var(--muted); + border-color: var(--line); +} + +.archive-filter-button.is-active { + background: var(--accent); + color: white; + border-color: transparent; +} + +input[type="search"] { + width: 100%; + border: 1px solid var(--line); + border-radius: 999px; + padding: 12px 16px; + font: inherit; + background: rgba(255, 255, 255, 0.9); +} + +select, +textarea, +button { + font: inherit; +} + +textarea, +select { + width: 100%; + border: 1px solid var(--line); + border-radius: 18px; + padding: 12px 14px; + background: rgba(255, 255, 255, 0.92); +} + +button { + border: 1px solid transparent; + border-radius: 999px; + padding: 11px 16px; + background: var(--accent); + color: white; + cursor: pointer; +} + +.secondary-button { + background: rgba(255, 255, 255, 0.7); + color: var(--ink); + border-color: var(--line); +} + +.species-list { + display: flex; + flex-direction: column; + gap: 12px; + max-height: 70vh; + overflow: auto; +} + +.species-card { + text-align: left; + padding: 14px; + border-radius: 18px; + border: 1px solid var(--line); + background: linear-gradient(180deg, rgba(255, 255, 255, 0.95), rgba(241, 237, 230, 0.95)); + cursor: pointer; + transition: transform 160ms ease, border-color 160ms ease; +} + +.species-card:hover { + transform: translateY(-1px); + border-color: rgba(15, 118, 110, 0.45); +} + +.species-card-archived { + border-style: dashed; + background: linear-gradient(180deg, rgba(247, 241, 231, 0.98), rgba(233, 226, 214, 0.98)); +} + +.species-name, +.species-meta, +.species-snippet { + display: block; +} + +.species-name { + font-size: 1.05rem; + font-weight: 700; +} + +.species-meta, +.species-snippet, +.detail-scientific-name, +.detail-summary, +.footer p, +.error { + color: var(--muted); +} + +.species-snippet { + margin-top: 6px; + font-size: 0.92rem; +} + +.species-state-badge, +.detail-badge { + display: inline-flex; + align-items: center; + gap: 6px; + padding: 3px 10px; + border-radius: 999px; + border: 1px solid rgba(188, 108, 37, 0.32); + background: rgba(188, 108, 37, 0.12); + color: var(--accent-2); + font-size: 0.78rem; + font-weight: 700; + letter-spacing: 0.04em; + text-transform: uppercase; +} + +.species-state-badge { + margin-left: 8px; +} + +.empty-state { + min-height: 320px; + display: grid; + place-items: center; + text-align: center; +} + +.detail.hidden, +.hidden { + display: none; +} + +.detail-header { + padding-bottom: 16px; + border-bottom: 1px solid var(--line); +} + +.detail-title-row { + display: flex; + align-items: center; + gap: 12px; + flex-wrap: wrap; +} + +.detail-code { + margin: 0 0 8px; + text-transform: uppercase; + letter-spacing: 0.14em; + color: var(--accent-2); + font-size: 0.8rem; +} + +.detail-scientific-name { + margin-top: 6px; + font-style: italic; +} + +.detail-archive-note { + margin: 12px 0 0; + padding: 12px 14px; + border-radius: 14px; + background: rgba(188, 108, 37, 0.1); + border: 1px solid rgba(188, 108, 37, 0.2); + color: var(--accent-2); +} + +.detail-sections { + display: grid; + gap: 16px; + margin-top: 18px; +} + +.detail-section { + padding: 16px; + border-radius: 18px; + border: 1px solid var(--line); + background: rgba(255, 255, 255, 0.66); +} + +.detail-section h3 { + margin-top: 0; + color: var(--accent); +} + +.editor-panel { + margin-top: 18px; +} + +.editor-label { + display: block; + margin: 0 0 8px; + font-weight: 700; +} + +.editor-label + select, +.editor-label + textarea { + margin-bottom: 14px; +} + +.archive-toggle { + display: flex; + align-items: center; + gap: 10px; + margin: 0 0 14px; + color: var(--ink); + font-weight: 700; +} + +.archive-toggle input { + width: 18px; + height: 18px; + accent-color: var(--accent-2); +} + +.editor-actions { + display: flex; + gap: 12px; + align-items: center; + flex-wrap: wrap; +} + +.editor-status { + margin: 0; + color: var(--muted); +} + +.audit-list { + display: grid; + gap: 12px; +} + +.audit-entry { + padding: 14px; + border-radius: 16px; + border: 1px solid var(--line); + background: rgba(255, 255, 255, 0.72); +} + +.audit-meta { + margin: 0 0 10px; + color: var(--muted); + font-size: 0.92rem; +} + +.section-editor { + min-height: 220px; + margin-bottom: 12px; + white-space: pre-wrap; + font-family: "Courier New", monospace; + font-size: 0.92rem; + line-height: 1.45; +} + +.diagnostic-list { + margin: 0; + padding-left: 18px; +} + +.diagnostic-list li + li { + margin-top: 8px; +} + +pre { + margin: 0; + white-space: pre-wrap; + font-family: "Courier New", monospace; + font-size: 0.92rem; + line-height: 1.45; +} + +.footer { + margin-top: 20px; + padding: 18px 22px; +} + +@media (max-width: 960px) { + .workspace { + grid-template-columns: 1fr; + } + + .species-list { + max-height: 40vh; + } +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..28889c9 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,78 @@ +services: + db: + image: postgres:16-alpine + environment: + POSTGRES_DB: ecospecies + POSTGRES_USER: ecospecies + POSTGRES_PASSWORD: ecospecies + PGDATA: /var/lib/postgresql/data/pgdata + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ecospecies -d ecospecies"] + interval: 5s + timeout: 5s + retries: 10 + ports: + - "${ECOSPECIES_DB_PORT:-5432}:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + + importer: + image: python:3.12-slim + depends_on: + db: + condition: service_healthy + working_dir: /workspace + environment: + ECOSPECIES_DATA_DIR: /legacy-data/InputFiles - TXT + ECOSPECIES_DATABASE_URL: postgresql+psycopg://ecospecies:ecospecies@db:5432/ecospecies + ECOSPECIES_VENV_DIR: /workspace/.docker/venv + PYTHONPATH: /workspace/apps/api/src + command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-import.sh"] + volumes: + - .:/workspace + - ../01-legacy-code-and-data:/legacy-data:ro + - python_venv:/workspace/.docker/venv + - pip_cache:/root/.cache/pip + + api: + image: python:3.12-slim + restart: unless-stopped + depends_on: + db: + condition: service_healthy + importer: + condition: service_completed_successfully + working_dir: /workspace + environment: + ECOSPECIES_DATA_DIR: /legacy-data/InputFiles - TXT + ECOSPECIES_DATABASE_URL: postgresql+psycopg://ecospecies:ecospecies@db:5432/ecospecies + ECOSPECIES_HOST: 0.0.0.0 + ECOSPECIES_PORT: "8000" + ECOSPECIES_AUTH_TOKENS: ${ECOSPECIES_AUTH_TOKENS:-} + ECOSPECIES_VENV_DIR: /workspace/.docker/venv + PYTHONPATH: /workspace/apps/api/src + command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-api.sh"] + ports: + - "${ECOSPECIES_API_PORT:-8000}:8000" + volumes: + - .:/workspace + - ../01-legacy-code-and-data:/legacy-data:ro + - python_venv:/workspace/.docker/venv + - pip_cache:/root/.cache/pip + + web: + image: nginx:1.27-alpine + restart: unless-stopped + depends_on: + api: + condition: service_started + ports: + - "${ECOSPECIES_WEB_PORT:-8080}:80" + volumes: + - ./apps/web:/usr/share/nginx/html:ro + - ./apps/web/nginx.conf:/etc/nginx/conf.d/default.conf:ro + +volumes: + postgres_data: + python_venv: + pip_cache: diff --git a/docs/flelmr-authoring.md b/docs/flelmr-authoring.md new file mode 100644 index 0000000..c2c5fd1 --- /dev/null +++ b/docs/flelmr-authoring.md @@ -0,0 +1,44 @@ +# FLELMR Authoring Notes + +## Summary Field + +The legacy outline reserves header slot `1.4` for `Summary/Abstract` or `Executive Summary`, but the historical corpus in this repository does not contain populated examples. Future FLELMR-compatible records should treat this as a short abstract for the full species life history. + +## Recommended Format + +Place the summary in the `HEADER` section immediately after the FLELMR code block: + +```text +Summary/Abstract: +Short paragraph here. +``` + +Accepted marker variants: + +- `Summary/Abstract` +- `Summary/Abstract:` +- `Summary` +- `Summary:` +- `Abstract` +- `Abstract:` +- `Executive Summary` +- `Executive Summary:` + +## Intended Meaning + +The field should be a concise executive overview of the record, not metadata and not a section outline. It should help a reader understand the species before they read the detailed habitat, trophic, reproductive, and citation sections. + +## Suggested Content + +Write 3 to 6 sentences that cover: + +- species identity and ecological or management importance +- general geographic range +- major habitat use or life-stage pattern +- notable trophic or reproductive traits +- major conservation, fishery, or data-status note if relevant + +## Publishing Rule + +- During ingestion or editorial review, a missing summary should be treated as a warning. +- In accepted datasets, the summary may remain empty if no editor-authored abstract is available. diff --git a/docs/forgejo-activation.md b/docs/forgejo-activation.md new file mode 100644 index 0000000..76e171f --- /dev/null +++ b/docs/forgejo-activation.md @@ -0,0 +1,38 @@ +# Forgejo CI Activation Checklist + +Use this checklist before turning on a Forgejo-native pipeline for `EcoSpecies-Atlas`. + +## 1. Confirm Workflow Source + +- Decide whether the repository will continue using the fallback `.github/workflows/ci.yml` definition or activate a dedicated `.forgejo/workflows/ci.yml`. +- If a Forgejo-native workflow is desired, start from `.forgejo/workflows/ci.yml.template`. + +## 2. Confirm Runner Labels + +- Identify the runner label accepted by the target Forgejo instance. +- Replace the placeholder `runs-on: docker` values in the template with that label before activation. +- Verify that the selected runner can execute both Python and Node-based jobs. + +## 3. Confirm Action Source Policy + +- Check whether the Forgejo instance allows GitHub-hosted actions such as `actions/checkout`, `actions/setup-python`, and `actions/setup-node`. +- If remote GitHub-hosted actions are not allowed, replace those `uses:` entries with approved internal or mirrored actions. + +## 4. Activate The Workflow + +1. Copy `.forgejo/workflows/ci.yml.template` to `.forgejo/workflows/ci.yml`. +2. Update runner labels. +3. Update action references if required by the instance policy. +4. Commit the activated workflow. + +## 5. Validate CI Behavior + +- Open a test change request and confirm the repository-layer API tests run. +- Confirm the stubbed browser smoke test runs and passes. +- Verify that failures block merges according to the repository-host policy. + +## 6. Optional Hardening + +- Require the `CI` workflow before merge. +- Mirror or pin all action sources used by the Forgejo workflow. +- Document any instance-specific runner labels or action mirrors in the repository host settings or administrator notes. diff --git a/docs/legacy-survey.md b/docs/legacy-survey.md new file mode 100644 index 0000000..a0460f7 --- /dev/null +++ b/docs/legacy-survey.md @@ -0,0 +1,101 @@ +# Legacy EcoSpecies Survey + +## Scope + +This repository is primarily an archive of the legacy EcoSpecies system and its source materials. The current contents are sufficient to begin a structured migration plan and an initial replacement implementation. + +## Repository Inventory + +- `01-legacy-code-and-data/EcoSpecies_2012_0807_onCurly`: legacy ASP.NET MVC application source. +- `01-legacy-code-and-data/EcoSpeciesSql_new`: SQL Server database creation and lookup-population scripts. +- `01-legacy-code-and-data/InputFiles - TXT`: 92 source species life history text files. +- `01-legacy-code-and-data/OutputFiles - RTF`: 95 generated report outputs. +- `01-legacy-code-and-data/ecospecies-2/species-life-histories`: 95 paired `.txt` and `.sql` files representing a later export snapshot. +- `01-legacy-code-and-data/slh-mod-txt2sql`: Python parsing scripts and hand-edited SQL/text files used to ingest SLH content. +- `01-legacy-code-and-data/TextFilesAboutFLELMR_EcoSpecies`: manuals, contract/report artifacts, information architecture, and historical background. +- `02-docs`: project notes, species outlines, import notes, and spreadsheets. + +## Legacy Capabilities Confirmed + +The legacy ASP.NET MVC application exposes these core workflows: + +- Public species list with taxonomic sorting and fielded search. +- Per-species detail pages. +- Heading and subheading navigation for life-history content. +- Report generation to `rtf`, `txt`, and "web only" output modes. +- Public/private visibility controls via `tbl_Slh.PublicView`. +- XML-template-driven report assembly. + +Evidence: + +- `HomeController.cs` provides `Home`, `About`, `Glossary`, and `Manual`. +- `OrganismController.cs` implements listing, filtering, details, node/subnode views, and CRUD. +- `ReportController.cs` implements report generation and batch export. + +## Legacy Architecture Summary + +### Application layer + +- ASP.NET MVC 3-era application. +- Entity Framework database-first model (`EcoSpecies.edmx` and generated context/models). +- Razor views with jQuery/jQuery UI assets. + +### Data layer + +- SQL Server database `Eco_Species`. +- 31 SQL scripts in `EcoSpeciesSql_new` for database creation, schema population, lookup tables, admin user creation, and XML template support. + +### Content pipeline + +- Species Life History text files are semi-structured and heading-driven. +- Legacy Python parsing scripts (`slhparse.py`, `slhparse_2012_0801.py`) contain domain-specific cleanup and tag recognition logic. +- Generated outputs include SQL inserts and RTF/text reports. + +## Important Migration Observations + +### What is reusable + +- Raw SLH text corpus. +- SQL schema as a source of domain concepts and relationship mapping. +- Parsing logic and tag dictionaries as institutional knowledge. +- Glossary/manual/about content for continuity. +- Existing report outputs for regression comparison. + +### What should not be copied forward directly + +- SQL Server-specific operational assumptions. +- Legacy publish/deploy practices. +- MVC 3 / EF database-first scaffolding. +- Generated binaries and `obj` artifacts. + +### Data-model implications + +The archive suggests the modern system needs first-class support for: + +- species and taxonomic metadata +- one or more source documents per species +- hierarchical sections/headings/subheadings +- citations/references and authoring metadata +- visibility/publication state +- report/export templates +- ecological linkages suitable for graph-style visualization + +## Risks and Gaps + +- The source text format is inconsistent and sometimes noisy; ingest must tolerate malformed headings and spacing. +- The legacy system notes that some outlines used 4-5 levels while the implemented site handled only 3 levels. +- The current repository does not include a clean, already-normalized database dump for direct import. +- Image/assets provenance and usage permissions need review during migration. + +## Acknowledgements To Preserve + +The replacement app should preserve credit to: + +- Dr. Peter Rubec for FLELMR-derived source material and species life history content. +- Dr. Diane Blackwood for the original EcoSpecies web application and architecture work. +- Dr. Welsbery R. Elsberry for consultation and Python programming support. +- Florida Fish and Wildlife Research Institute and related public-agency context described in the project materials. + +## Immediate Migration Recommendation + +Use the SLH text corpus as the initial authoritative ingest source, not the legacy MVC app. Treat the SQL schema and parser scripts as reference material for a modern normalized model and for ingest validation. diff --git a/docs/roadmap.md b/docs/roadmap.md new file mode 100644 index 0000000..7d25664 --- /dev/null +++ b/docs/roadmap.md @@ -0,0 +1,126 @@ +# EcoSpecies Modernization Roadmap + +## Target Product + +Create a Docker Compose-based, open-source EcoSpecies successor that: + +- ingests legacy SLH text files and future species submissions +- exposes a stable API for species, sections, citations, and ecological linkages +- provides a responsive public web app +- supports researcher/editor workflows for curation and publishing +- generates exports aligned with legacy reporting needs and future FLELMR-style outputs + +## Recommended Stack + +### Core platform + +- Backend: Python API service +- Primary datastore: PostgreSQL +- Search/indexing: PostgreSQL full-text initially, optional Meilisearch/OpenSearch later +- Frontend: static SPA or React-based client once requirements stabilize +- Deployment/runtime: Docker Compose for development and small-scale deployment + +### Why this stack + +- permissive licenses +- strong support for text ingestion, APIs, and data processing +- easy local development +- clear path from prototype to production + +## Product Capabilities By Phase + +### Phase 0: Discovery and migration planning + +- Inventory legacy assets and user-facing capabilities. +- Capture the replacement architecture and ingestion strategy. +- Define acknowledgements, provenance, and licensing boundaries. + +### Phase 1: Ingestion foundation + +- Parse legacy `.txt` SLH inputs into structured JSON records. +- Normalize common metadata: title, scientific name, common name, FLELMR code, headings, references. +- Create ingest diagnostics to flag malformed files and missing metadata. + +### Phase 2: Public read experience + +- Species listing and search. +- Species detail view with section navigation. +- Provenance and acknowledgement display. +- Summary metrics on corpus coverage. + +### Phase 3: Structured persistence + +- Move parsed content into PostgreSQL. +- Add editor-safe import jobs and audit metadata. +- Preserve raw source alongside normalized records. +- Establish authentication and role-based access for editor and admin workflows. +- Add persisted editorial workflow state for draft, review, and published records. +- Make document sections individually addressable for editor review and revision, with audit history for section-level changes. + +### Phase 4: Linkages and visualization + +- Model predator/prey, habitat, and ecological association edges. +- Add graph endpoints and species-relationship views. +- Support public-friendly visual explanations and expert filters. + +### Phase 5: Reports and export + +- Recreate legacy-like text/RTF export. +- Add machine-readable export formats such as JSON and Markdown. +- Support FLELMR-oriented authoring/export profiles. + +### Phase 6: Assisted research workflows + +- Add local-LLM-assisted extraction and drafting in a human-review loop. +- Integrate bibliography tooling for citation consolidation. +- Support candidate-species intake for records not yet in the historical corpus. +- Restrict assisted drafting and publication actions to authenticated editorial roles. + +## Data Model Direction + +Initial core entities: + +- `species` +- `source_document` +- `document_section` +- `citation` +- `taxon` +- `linkage` +- `media_asset` +- `ingest_run` + +Key design rules: + +- preserve raw source text +- retain provenance and import timestamps +- separate public published records from draft/editor states +- make sections addressable for citation and graph linking + +## LLM Extension Strategy + +Use local models only for assistive tasks, never silent publication: + +- extracting candidate structured fields from new SLH text +- suggesting missing headings or linkage labels +- clustering similar citations +- drafting summaries for editor review + +Guardrails: + +- raw text remains authoritative +- all generated content is marked as draft +- every automated extraction stores source spans where possible + +## Development Roadmap + +1. Implement a thin ingestion API over the legacy text corpus. +2. Build a responsive browser UI for listing and viewing species. +3. Add a persistent PostgreSQL-backed ingest store. +4. Introduce export and visualization services. +5. Add editorial workflows and local-LLM assistance. + +## Definition Of Done For The Initial Milestone + +- `docker compose up` starts a working API and frontend. +- The system can enumerate the legacy corpus and show parsed species detail for at least one real SLH file. +- Project docs describe the migration approach, target architecture, and next phases. diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..9a686c6 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,76 @@ +{ + "name": "ecospecies-atlas", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "ecospecies-atlas", + "devDependencies": { + "@playwright/test": "^1.58.2" + } + }, + "node_modules/@playwright/test": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.58.2.tgz", + "integrity": "sha512-akea+6bHYBBfA9uQqSYmlJXn61cTa+jbO87xVLCWbTqbWadRVmhxlXATaOjOgcBaWU4ePo0wB41KMFv3o35IXA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright": "1.58.2" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/playwright": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.2.tgz", + "integrity": "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.58.2" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.58.2.tgz", + "integrity": "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..4f2abb4 --- /dev/null +++ b/package.json @@ -0,0 +1,11 @@ +{ + "name": "ecospecies-atlas", + "private": true, + "scripts": { + "test:ui": "node ./node_modules/@playwright/test/cli.js test", + "test:ui:stack": "PLAYWRIGHT_BASE_URL=http://127.0.0.1:18080 PLAYWRIGHT_LIVE_STACK=1 node ./node_modules/@playwright/test/cli.js test" + }, + "devDependencies": { + "@playwright/test": "^1.58.2" + } +} diff --git a/playwright.config.js b/playwright.config.js new file mode 100644 index 0000000..bf36f38 --- /dev/null +++ b/playwright.config.js @@ -0,0 +1,22 @@ +const { defineConfig } = require("./node_modules/@playwright/test"); + +const isLiveStack = process.env.PLAYWRIGHT_LIVE_STACK === "1"; +const baseURL = process.env.PLAYWRIGHT_BASE_URL || "http://127.0.0.1:4173"; + +module.exports = defineConfig({ + testDir: "./tests/ui", + timeout: 30000, + retries: 0, + use: { + baseURL, + headless: true, + }, + webServer: isLiveStack + ? undefined + : { + command: "node tests/ui/server.js", + url: "http://127.0.0.1:4173", + reuseExistingServer: true, + timeout: 30000, + }, +}); diff --git a/scripts/bootstrap-python-env.sh b/scripts/bootstrap-python-env.sh new file mode 100644 index 0000000..7da0cad --- /dev/null +++ b/scripts/bootstrap-python-env.sh @@ -0,0 +1,13 @@ +#!/bin/sh +set -eu + +ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" +VENV_DIR="${ECOSPECIES_VENV_DIR:-$ROOT_DIR/.docker/venv}" + +mkdir -p "$VENV_DIR" + +if [ ! -x "$VENV_DIR/bin/python" ]; then + python3 -m venv --copies --clear "$VENV_DIR" +fi + +"$VENV_DIR/bin/pip" install --disable-pip-version-check -r "$ROOT_DIR/apps/api/requirements.txt" diff --git a/scripts/check-api-tests.sh b/scripts/check-api-tests.sh new file mode 100644 index 0000000..94400c4 --- /dev/null +++ b/scripts/check-api-tests.sh @@ -0,0 +1,7 @@ +#!/bin/sh +set -eu + +ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" + +cd "$ROOT_DIR/apps/api" +exec python -m unittest -v diff --git a/scripts/check-ui-smoke.sh b/scripts/check-ui-smoke.sh new file mode 100644 index 0000000..6e3643d --- /dev/null +++ b/scripts/check-ui-smoke.sh @@ -0,0 +1,10 @@ +#!/bin/sh +set -eu + +ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" + +cd "$ROOT_DIR" +if [ ! -d node_modules ]; then + npm install --no-bin-links +fi +exec npm run test:ui diff --git a/scripts/check-ui-stack-smoke.sh b/scripts/check-ui-stack-smoke.sh new file mode 100644 index 0000000..f6ac1b7 --- /dev/null +++ b/scripts/check-ui-stack-smoke.sh @@ -0,0 +1,10 @@ +#!/bin/sh +set -eu + +ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" + +cd "$ROOT_DIR" +if [ ! -d node_modules ]; then + npm install --no-bin-links +fi +exec npm run test:ui:stack diff --git a/scripts/run-api.sh b/scripts/run-api.sh new file mode 100644 index 0000000..bd082a8 --- /dev/null +++ b/scripts/run-api.sh @@ -0,0 +1,9 @@ +#!/bin/sh +set -eu + +ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" +VENV_DIR="${ECOSPECIES_VENV_DIR:-$ROOT_DIR/.docker/venv}" + +export PYTHONPATH="${PYTHONPATH:-$ROOT_DIR/apps/api/src}" + +exec "$VENV_DIR/bin/python" -m ecospecies_api.app diff --git a/scripts/run-import.sh b/scripts/run-import.sh new file mode 100644 index 0000000..bd9b5d1 --- /dev/null +++ b/scripts/run-import.sh @@ -0,0 +1,9 @@ +#!/bin/sh +set -eu + +ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" +VENV_DIR="${ECOSPECIES_VENV_DIR:-$ROOT_DIR/.docker/venv}" + +export PYTHONPATH="${PYTHONPATH:-$ROOT_DIR/apps/api/src}" + +exec "$VENV_DIR/bin/python" -m ecospecies_api.cli diff --git a/tests/ui/editor-ui.spec.js b/tests/ui/editor-ui.spec.js new file mode 100644 index 0000000..4516e4f --- /dev/null +++ b/tests/ui/editor-ui.spec.js @@ -0,0 +1,46 @@ +const { test, expect } = require("../../node_modules/@playwright/test"); + +test.skip(process.env.PLAYWRIGHT_LIVE_STACK === "1", "Stub-only UI smoke test"); + +test.beforeEach(async ({ request }) => { + await request.post("/__reset"); +}); + +test("editor can filter archived records and archive/unarchive from the UI", async ({ page }) => { + const archiveFilters = page.locator("#archive-filter-group"); + const speciesList = page.locator("#species-list"); + + await page.goto("/"); + + await page.getByPlaceholder("Bearer token for editor access").fill("editor-token"); + await page.getByRole("button", { name: "Use Token" }).click(); + + await expect(archiveFilters).toBeVisible(); + await expect(archiveFilters.getByRole("button", { name: "Active", exact: true })).toHaveClass(/is-active/); + await expect(speciesList.getByRole("button", { name: /Active Shad/ })).toBeVisible(); + await expect(speciesList.getByRole("button", { name: /Archived Shad/ })).toHaveCount(0); + + await archiveFilters.getByRole("button", { name: "All", exact: true }).click(); + await expect(speciesList.getByRole("button", { name: /Archived Shad/ })).toBeVisible(); + + await speciesList.getByRole("button", { name: /Active Shad/ }).click(); + await expect(page.getByText("Archive this species")).toBeVisible(); + await expect(page.locator("#editor-is-archived")).not.toBeChecked(); + await page.locator("#editor-is-archived").check(); + await page.getByRole("button", { name: "Save Editorial Changes" }).click(); + + await expect(page.locator("#detail-archive-badge")).toBeVisible(); + await expect(page.locator("#detail-archive-note")).toBeVisible(); + + await archiveFilters.getByRole("button", { name: "Archived", exact: true }).click(); + await expect(speciesList.getByRole("button", { name: /Active Shad/ })).toBeVisible(); + + await speciesList.getByRole("button", { name: /Active Shad/ }).click(); + await expect(page.locator("#editor-is-archived")).toBeChecked(); + await page.locator("#editor-is-archived").uncheck(); + await page.getByRole("button", { name: "Save Editorial Changes" }).click(); + + await archiveFilters.getByRole("button", { name: "Active", exact: true }).click(); + await expect(speciesList.getByRole("button", { name: /Active Shad/ })).toBeVisible(); + await expect(page.locator("#detail-archive-badge")).toBeHidden(); +}); diff --git a/tests/ui/live-stack.spec.js b/tests/ui/live-stack.spec.js new file mode 100644 index 0000000..eb5d1ae --- /dev/null +++ b/tests/ui/live-stack.spec.js @@ -0,0 +1,46 @@ +const { test, expect } = require("../../node_modules/@playwright/test"); + +test.skip(process.env.PLAYWRIGHT_LIVE_STACK !== "1", "Live-stack smoke test only"); + +test("editor can archive and unarchive a real species in the running stack", async ({ page }) => { + const archiveFilters = page.locator("#archive-filter-group"); + const speciesList = page.locator("#species-list"); + const targetName = /Alabama Shad/; + + await page.goto("/"); + + await page.getByPlaceholder("Bearer token for editor access").fill("editor-token"); + await page.getByRole("button", { name: "Use Token" }).click(); + + await expect(archiveFilters).toBeVisible(); + await archiveFilters.getByRole("button", { name: "All", exact: true }).click(); + await page.locator("#search").fill("Alabama"); + await expect(speciesList.getByRole("button", { name: targetName })).toBeVisible(); + await speciesList.getByRole("button", { name: targetName }).click(); + + await expect(page.locator("#editor-is-archived")).toBeVisible(); + + if (await page.locator("#editor-is-archived").isChecked()) { + await page.locator("#editor-is-archived").uncheck(); + await page.getByRole("button", { name: "Save Editorial Changes" }).click(); + await expect(page.locator("#detail-archive-badge")).toBeHidden(); + } + + await page.locator("#editor-is-archived").check(); + await page.getByRole("button", { name: "Save Editorial Changes" }).click(); + await expect(page.locator("#detail-archive-badge")).toBeVisible(); + await expect(page.locator("#detail-archive-note")).toBeVisible(); + + await archiveFilters.getByRole("button", { name: "Archived", exact: true }).click(); + await expect(speciesList.getByRole("button", { name: targetName })).toBeVisible(); + + await speciesList.getByRole("button", { name: targetName }).click(); + await expect(page.locator("#editor-is-archived")).toBeChecked(); + await page.locator("#editor-is-archived").uncheck(); + await page.getByRole("button", { name: "Save Editorial Changes" }).click(); + + await archiveFilters.getByRole("button", { name: "Active", exact: true }).click(); + await page.locator("#search").fill("Alabama"); + await expect(speciesList.getByRole("button", { name: targetName })).toBeVisible(); + await expect(page.locator("#detail-archive-badge")).toBeHidden(); +}); diff --git a/tests/ui/server.js b/tests/ui/server.js new file mode 100644 index 0000000..7b98d4e --- /dev/null +++ b/tests/ui/server.js @@ -0,0 +1,240 @@ +const http = require("http"); +const fs = require("fs"); +const path = require("path"); + +const PORT = Number(process.env.PORT || 4173); +const WEB_ROOT = path.resolve(__dirname, "../../apps/web"); + +function clone(value) { + return JSON.parse(JSON.stringify(value)); +} + +const baseSpecies = [ + { + slug: "active-shad", + source_file: "Active Shad.txt", + title: "Active Shad (Alosa activa)", + common_name: "Active Shad", + scientific_name: "Alosa activa", + flelmr_code: "1001", + summary: "An active editor-visible species.", + section_count: 2, + publication_status: "published", + is_archived: false, + editor_notes: "", + last_modified_by: "system-import", + diagnostics: [], + sections: [ + { id: 1, position: 1, heading: "HEADER", content: "Header content" }, + { id: 2, position: 2, heading: "HABITAT", content: "Habitat content" }, + ], + audit: [], + }, + { + slug: "archived-shad", + source_file: "Archived Shad.txt", + title: "Archived Shad (Alosa archiva)", + common_name: "Archived Shad", + scientific_name: "Alosa archiva", + flelmr_code: "1002", + summary: "An archived species.", + section_count: 1, + publication_status: "published", + is_archived: true, + editor_notes: "Archived from prior import.", + last_modified_by: "system-import", + diagnostics: [], + sections: [ + { id: 3, position: 1, heading: "HEADER", content: "Archived header" }, + ], + audit: [ + { + id: 1, + changed_by: "system-import", + changed_at: "2026-03-26T00:00:00+00:00", + action: "import_archive", + details: { is_archived: { from: false, to: true } }, + }, + ], + }, +]; + +let speciesRecords = clone(baseSpecies); + +function resetState() { + speciesRecords = clone(baseSpecies); +} + +function getSession(req) { + const auth = req.headers.authorization || ""; + if (auth === "Bearer editor-token") { + return { + authenticated: true, + auth_configured: true, + user: { username: "editor", role: "editor" }, + }; + } + return { + authenticated: false, + auth_configured: true, + user: null, + }; +} + +function sendJson(res, status, payload) { + const body = JSON.stringify(payload); + res.writeHead(status, { + "Content-Type": "application/json; charset=utf-8", + "Content-Length": Buffer.byteLength(body), + }); + res.end(body); +} + +function sendFile(res, filePath) { + fs.readFile(filePath, (error, content) => { + if (error) { + sendJson(res, 404, { error: "Not found" }); + return; + } + const ext = path.extname(filePath); + const type = + ext === ".html" + ? "text/html; charset=utf-8" + : ext === ".js" + ? "application/javascript; charset=utf-8" + : "text/css; charset=utf-8"; + res.writeHead(200, { "Content-Type": type }); + res.end(content); + }); +} + +function parseBody(req) { + return new Promise((resolve, reject) => { + let raw = ""; + req.on("data", (chunk) => { + raw += chunk; + }); + req.on("end", () => { + try { + resolve(raw ? JSON.parse(raw) : {}); + } catch (error) { + reject(error); + } + }); + req.on("error", reject); + }); +} + +function getEditorList() { + return speciesRecords.map((item) => ({ + slug: item.slug, + title: item.title, + common_name: item.common_name, + publication_status: item.publication_status, + is_archived: item.is_archived, + last_modified_by: item.last_modified_by, + diagnostics: item.diagnostics, + })); +} + +const server = http.createServer(async (req, res) => { + const url = new URL(req.url, `http://${req.headers.host}`); + const pathname = url.pathname; + + if (pathname === "/__reset" && req.method === "POST") { + resetState(); + sendJson(res, 200, { status: "ok" }); + return; + } + + if (pathname === "/api/auth/session" && req.method === "GET") { + sendJson(res, 200, getSession(req)); + return; + } + + if (pathname === "/api/insights/summary" && req.method === "GET") { + sendJson(res, 200, { species_count: 1, section_count: 3, diagnostic_counts: {} }); + return; + } + + if (pathname === "/api/editor/species" && req.method === "GET") { + sendJson(res, 200, { items: getEditorList(), count: speciesRecords.length }); + return; + } + + if (pathname.startsWith("/api/editor/species/") && pathname.endsWith("/audit") && req.method === "GET") { + const slug = pathname.slice("/api/editor/species/".length, -"/audit".length).replace(/\/$/, ""); + const item = speciesRecords.find((record) => record.slug === slug); + if (!item) { + sendJson(res, 404, { error: "Not found" }); + return; + } + sendJson(res, 200, { items: item.audit, count: item.audit.length }); + return; + } + + if (pathname.startsWith("/api/editor/species/") && pathname.endsWith("/editorial") && req.method === "POST") { + const slug = pathname.slice("/api/editor/species/".length, -"/editorial".length).replace(/\/$/, ""); + const item = speciesRecords.find((record) => record.slug === slug); + if (!item) { + sendJson(res, 404, { error: "Not found" }); + return; + } + const payload = await parseBody(req); + const beforeArchived = item.is_archived; + item.publication_status = payload.publication_status || item.publication_status; + item.summary = payload.summary ?? item.summary; + item.editor_notes = payload.editor_notes ?? item.editor_notes; + item.is_archived = Boolean(payload.is_archived); + item.last_modified_by = "editor"; + item.audit.unshift({ + id: item.audit.length + 1, + changed_by: "editor", + changed_at: "2026-03-26T00:01:00+00:00", + action: "editorial_update", + details: { + is_archived: { from: beforeArchived, to: item.is_archived }, + }, + }); + sendJson(res, 200, { + status: "ok", + slug: item.slug, + summary: item.summary, + publication_status: item.publication_status, + editor_notes: item.editor_notes, + is_archived: item.is_archived, + last_modified_by: item.last_modified_by, + changed_fields: { + is_archived: { from: beforeArchived, to: item.is_archived }, + }, + }); + return; + } + + if (pathname.startsWith("/api/editor/species/") && req.method === "GET") { + const slug = pathname.slice("/api/editor/species/".length).replace(/\/$/, ""); + const item = speciesRecords.find((record) => record.slug === slug); + if (!item) { + sendJson(res, 404, { error: "Not found" }); + return; + } + sendJson(res, 200, item); + return; + } + + if (pathname === "/" || pathname === "/index.html") { + sendFile(res, path.join(WEB_ROOT, "index.html")); + return; + } + + if (pathname === "/app.js" || pathname === "/styles.css") { + sendFile(res, path.join(WEB_ROOT, pathname.slice(1))); + return; + } + + sendJson(res, 404, { error: "Not found" }); +}); + +server.listen(PORT, "127.0.0.1", () => { + process.stdout.write(`UI test server listening on ${PORT}\n`); +});