Initial commit

This commit is contained in:
welsberr 2026-03-26 00:54:55 -04:00
parent 0eec08abf7
commit a6b04a995a
39 changed files with 3786 additions and 229 deletions

View File

@ -0,0 +1,48 @@
name: CI
on:
push:
pull_request:
jobs:
api-tests:
# Replace `docker` with the runner label used by the target Forgejo instance.
runs-on: docker
steps:
# Replace these `uses:` references if the target Forgejo instance does not
# allow GitHub-hosted actions directly.
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install API dependencies
run: pip install -r apps/api/requirements.txt
- name: Run repository tests
run: ./scripts/check-api-tests.sh
ui-smoke:
# Replace `docker` with the runner label used by the target Forgejo instance.
runs-on: docker
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Node
uses: actions/setup-node@v4
with:
node-version: "20"
cache: "npm"
- name: Install UI test dependencies
run: npm ci
- name: Install Playwright browser
run: npx playwright install --with-deps chromium
- name: Run stub UI smoke test
run: ./scripts/check-ui-smoke.sh

14
.github/pull_request_template.md vendored Normal file
View File

@ -0,0 +1,14 @@
## Summary
-
## Verification
- [ ] `cd apps/api && python -m unittest -v`
- [ ] `npm run test:ui`
- [ ] `npm run test:ui:stack` if editor/archive behavior or real-stack wiring changed
## Notes
- Risk areas:
- Follow-up work:

46
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,46 @@
name: CI
on:
push:
branches:
- "**"
pull_request:
jobs:
api-tests:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install API dependencies
run: pip install -r apps/api/requirements.txt
- name: Run repository tests
run: ./scripts/check-api-tests.sh
ui-smoke:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Node
uses: actions/setup-node@v4
with:
node-version: "20"
cache: "npm"
- name: Install UI test dependencies
run: npm ci
- name: Install Playwright browser
run: npx playwright install --with-deps chromium
- name: Run stub UI smoke test
run: ./scripts/check-ui-smoke.sh

236
.gitignore vendored
View File

@ -1,229 +1,9 @@
# ---> Python
# Byte-compiled / optimized / DLL files
.docker/pip-cache/
.docker/venv/
var/postgres/
var/sqlite/
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# ---> Emacs
# -*- mode: gitignore; -*-
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc
auto-save-list
tramp
.\#*
# Org-mode
.org-id-locations
*_archive
# flymake-mode
*_flymake.*
# eshell files
/eshell/history
/eshell/lastdir
# elpa packages
/elpa/
# reftex files
*.rel
# AUCTeX auto folder
/auto/
# cask packages
.cask/
dist/
# Flycheck
flycheck_*.el
# server auth directory
/server/
# projectiles files
.projectile
# directory configuration
.dir-locals.el
# network security
/network-security.data
# ---> Rust
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
*.pyc
node_modules/
test-results/
playwright-report/

37
CONTRIBUTING.md Normal file
View File

@ -0,0 +1,37 @@
# Contributing
## Expectations
Changes should preserve the current verification baseline:
- repository-layer API tests must pass
- the stubbed browser smoke test must pass for UI/editor changes
- the live-stack browser smoke test should be run for changes that affect real editor/archive workflows when the Compose stack is available
## Local Checks
Run API tests:
```bash
./scripts/check-api-tests.sh
```
Run the stubbed browser smoke test:
```bash
./scripts/check-ui-smoke.sh
```
Run the real-stack browser smoke test:
```bash
./scripts/check-ui-stack-smoke.sh
```
## Notes
- CI currently runs the API tests and the stubbed browser smoke test.
- The live-stack smoke test is intentionally separate because it depends on a running Compose stack and editor auth configuration.
- If a change affects editorial persistence, archive behavior, or section editing, include the relevant verification command output or a concise summary in the change request.
- Repository hosts can call the scripts in `scripts/` directly instead of duplicating command wiring in host-specific pipeline definitions.
- Forgejo can use the existing `.github/workflows` CI definition through its documented fallback behavior. If a Forgejo-native pipeline is needed, start from `.forgejo/workflows/ci.yml.template`, then follow `docs/forgejo-activation.md` before activating it.

119
README.md
View File

@ -1,3 +1,120 @@
# EcoSpecies-Atlas
EcoSpecies-Atlas is a web app that provides species life history information to a broader audience.
Modern open-source follow-on to the legacy EcoSpecies application, built to ingest historical Species Life History materials and evolve into a maintainable public research platform.
## Repository layout
- `apps/api`: Python API and import logic
- `apps/web`: public web UI served by nginx
- `docs`: migration survey and roadmap
- `scripts`: Compose runtime scripts for bootstrapping the container-managed Python environment
- Docker named volume `python_venv`: container-managed Python virtual environment
- Docker named volume `pip_cache`: pip cache for container bootstrapping
- Docker named volume `postgres_data`: PostgreSQL data directory
- `var/sqlite`: host fallback for local non-Compose verification
## Runtime model
Docker Compose owns all runtime dependencies:
- PostgreSQL runs in a container with a Docker-managed named volume
- Python services run in `python:3.12-slim`
- the Python virtual environment is created in a Docker-managed volume mounted at `/workspace/.docker/venv`
- dependencies are installed from `apps/api/requirements.txt` inside that virtual environment
- the legacy corpus is mounted read-only from `../01-legacy-code-and-data`
No host Python packages are required for the Compose workflow.
## Start the stack
```bash
cd EcoSpecies-Atlas
docker compose up
```
Endpoints:
- web UI: `http://localhost:8080`
- API: `http://localhost:8000`
- PostgreSQL: `localhost:5432`
- liveness: `/healthz`
- readiness: `/readyz`
- auth session: `/api/auth/session`
- editor status: `/api/editor/status` (requires `editor` or `admin`)
- editor species list: `/api/editor/species` (requires `editor` or `admin`)
- editor workflow detail/update: `/api/editor/species/<slug>/workflow` (requires `editor` or `admin`)
- editor species detail: `/api/editor/species/<slug>` (requires `editor` or `admin`)
- editor editorial update: `/api/editor/species/<slug>/editorial` (requires `editor` or `admin`)
- editor section detail/update: `/api/editor/species/<slug>/sections/<position>` (requires `editor` or `admin`)
- editor audit history: `/api/editor/species/<slug>/audit` (requires `editor` or `admin`)
If those host ports are already in use, override them when starting Compose, for example:
```bash
ECOSPECIES_API_PORT=18000 ECOSPECIES_WEB_PORT=18080 docker compose up
```
## Host-visible state
All important runtime state is bind-mounted and visible on the host:
- source code and docs in this repo
- SQLite fallback database in `var/sqlite`
- optional SQLite fallback in `var/sqlite`
## Automated checks
Repository-host CI runs the repository-layer tests and the stubbed browser smoke test on pushes and change requests.
Contributor workflow guidance is in `CONTRIBUTING.md`.
When hosted on Forgejo, the current GitHub-compatible workflow layout can still be used. Forgejo Actions will look for workflows in `.forgejo/workflows`, and if that directory is absent it will fall back to `.github/workflows`. A Forgejo-native template is provided at `.forgejo/workflows/ci.yml.template`; copy it to `.forgejo/workflows/ci.yml` only after adapting the runner label and action source policy for the target instance. The activation checklist is in `docs/forgejo-activation.md`.
Run the repository-layer test suite with:
```bash
./scripts/check-api-tests.sh
```
Run the browser-level editor smoke test with:
```bash
./scripts/check-ui-smoke.sh
```
Run the browser-level smoke test against the real Compose stack with:
```bash
./scripts/check-ui-stack-smoke.sh
```
## Notes
- The importer seeds PostgreSQL from the legacy text corpus before the API starts and now synchronizes by slug instead of truncating the full dataset.
- Species missing from a later import payload are archived instead of deleted. Public endpoints hide archived records; editor endpoints can still inspect them.
- The editor species list supports `active`, `all`, and `archived` client-side filtering so archived records remain manageable in the UI.
- Editors can also archive or unarchive species explicitly from the editorial controls, with audit history recorded alongside other editorial changes.
- The API also supports a host-local SQLite fallback for direct verification when `ECOSPECIES_DATABASE_URL` is unset.
- PostgreSQL, the Python virtualenv, and the pip cache use Docker named volumes because bind-mounted runtime state is not reliable on CIFS-backed workspaces like this one.
- Initial editor auth uses `ECOSPECIES_AUTH_TOKENS` in the format `token:username:role[,token2:username2:role2]`, where `role` is `viewer`, `editor`, or `admin`.
- Editorial workflow state is persisted per species with `draft`, `review`, and `published` statuses. Public endpoints return only `published` records; editor endpoints can inspect and update all records.
- Editors can curate top-level metadata and section content from the web UI, and every editorial or section change is recorded in per-species audit history.
- Summary authoring guidance for future FLELMR-compatible records is in `docs/flelmr-authoring.md`.
- Legacy survey and roadmap artifacts are in `docs/`.
## Governance And Operations
The repository host, such as GitHub or Forgejo, is used for source control, change requests, code review, and CI checks. It is not part of the application runtime.
EcoSpecies-Atlas itself runs through Docker Compose, the Python API, nginx, and PostgreSQL. Import jobs, editor workflows, and browser access all depend on the application stack, not on the repository host.
In practice, the repository host is responsible for change management:
- storing the code, docs, workflow definitions, and test harnesses
- running CI checks on pushes and change requests
- supporting review and merge workflows
In practice, the application stack is responsible for operations:
- serving the web UI and API
- persisting editorial and import state
- running imports and editor workflows

18
apps/README.md Normal file
View File

@ -0,0 +1,18 @@
# Application Notes
## API
The API lives in `apps/api/src/ecospecies_api` and supports two storage modes:
- PostgreSQL through Docker Compose
- SQLite fallback for host-local verification
The importer writes persisted `species`, `document_section`, and `ingest_diagnostic` records before the API serves traffic.
## Web
The web app is static and served by nginx. nginx proxies `/api/*` and `/healthz` to the API service inside the Compose network.
## Dependency handling
Compose bootstraps a bind-mounted Python virtual environment in `.docker/venv` and installs `apps/api/requirements.txt` into that environment. This keeps runtime dependencies isolated from the host Python installation while leaving the environment visible on the host filesystem.

View File

@ -0,0 +1,2 @@
SQLAlchemy>=2.0,<3.0
psycopg[binary]>=3.2,<4.0

View File

@ -0,0 +1 @@
"""EcoSpecies API package."""

View File

@ -0,0 +1,433 @@
from __future__ import annotations
import json
import os
import time
from http import HTTPStatus
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from urllib.parse import parse_qs, urlparse
from ecospecies_api.auth import (
AuthSession,
auth_is_configured,
resolve_auth_session,
role_satisfies,
)
from ecospecies_api.parser import get_default_data_dir, load_species_records
from ecospecies_api.repository import (
get_editor_species_detail,
get_editor_species_list,
get_editor_species_workflow,
get_species_by_slug,
list_species_audit,
get_readiness_status,
get_summary_metrics,
has_species_data,
import_species_payload,
list_diagnostics,
list_species,
update_species_section,
update_species_editorial,
)
def make_payload() -> list[dict[str, object]]:
records = load_species_records(get_default_data_dir())
payload: list[dict[str, object]] = []
for record in records:
payload.append(
{
"slug": record.slug,
"source_file": record.source_file,
"title": record.title,
"common_name": record.common_name,
"scientific_name": record.scientific_name,
"flelmr_code": record.flelmr_code,
"summary": record.summary,
"section_count": record.section_count,
"diagnostics": [
{
"level": diagnostic.level,
"code": diagnostic.code,
"message": diagnostic.message,
}
for diagnostic in record.diagnostics
],
"sections": [
{"heading": section.heading, "content": section.content}
for section in record.sections
],
}
)
return payload
class EcoSpeciesHandler(BaseHTTPRequestHandler):
server_version = "EcoSpeciesHTTP/0.1"
def do_GET(self) -> None:
parsed = urlparse(self.path)
path = parsed.path
query = parse_qs(parsed.query)
session = resolve_auth_session(self.headers)
if path == "/healthz":
self.write_json(
{
"status": "ok",
"check": "liveness",
"process": "running",
}
)
return
if path == "/readyz":
readiness = get_readiness_status()
status = HTTPStatus.OK if readiness["ready"] else HTTPStatus.SERVICE_UNAVAILABLE
self.write_json(
{
"status": "ok" if readiness["ready"] else "degraded",
"check": "readiness",
**readiness,
},
status=status,
)
return
if path == "/api/auth/session":
self.write_json(
{
"authenticated": session is not None,
"auth_configured": auth_is_configured(),
"user": (
{"username": session.username, "role": session.role}
if session is not None
else None
),
}
)
return
if path == "/api/editor/status":
if not self.require_role(session, "editor"):
return
self.write_json(
{
"status": "ok",
"editor_access": True,
"user": {"username": session.username, "role": session.role},
"capabilities": [
"draft_ingest_review",
"editorial_curation",
"future_report_authoring",
],
}
)
return
if path == "/api/editor/species":
if not self.require_role(session, "editor"):
return
search = query.get("search", [""])[0].strip().lower()
items = get_editor_species_list(search)
compact = [
{
"slug": item["slug"],
"title": item["title"],
"common_name": item["common_name"],
"publication_status": item["publication_status"],
"is_archived": item["is_archived"],
"last_modified_by": item["last_modified_by"],
"diagnostic_count": len(item["diagnostics"]),
}
for item in items
]
self.write_json({"items": compact, "count": len(compact)})
return
if path.startswith("/api/editor/species/") and path.endswith("/audit"):
if not self.require_role(session, "editor"):
return
slug = path[len("/api/editor/species/") : -len("/audit")].strip("/")
items = list_species_audit(slug)
if items is None:
self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
return
self.write_json({"items": items, "count": len(items)})
return
if path.startswith("/api/editor/species/") and "/sections/" in path:
if not self.require_role(session, "editor"):
return
slug, _, tail = path[len("/api/editor/species/") :].partition("/sections/")
try:
section_position = int(tail.strip("/"))
except ValueError:
self.write_json({"error": "Invalid section position"}, status=HTTPStatus.BAD_REQUEST)
return
item = get_editor_species_detail(slug.strip("/"))
if item is None:
self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
return
for section in item["sections"]:
if section["position"] == section_position:
self.write_json(section)
return
self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
return
if path.startswith("/api/editor/species/") and not path.endswith("/workflow") and not path.endswith("/editorial") and not path.endswith("/audit"):
if not self.require_role(session, "editor"):
return
slug = path[len("/api/editor/species/") :].strip("/")
item = get_editor_species_detail(slug)
if item is None:
self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
return
self.write_json(item)
return
if path.startswith("/api/editor/species/") and path.endswith("/workflow"):
if not self.require_role(session, "editor"):
return
slug = path[len("/api/editor/species/") : -len("/workflow")].strip("/")
item = get_editor_species_workflow(slug)
if item is None:
self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
return
self.write_json(item)
return
if path == "/api/insights/summary":
metrics = get_summary_metrics()
self.write_json(
{
"species_count": metrics["species_count"],
"section_count": metrics["section_count"],
"diagnostic_counts": metrics["diagnostic_counts"],
"source_directory": get_default_data_dir(),
}
)
return
if path == "/api/insights/diagnostics":
flagged = list_diagnostics()
self.write_json({"items": flagged, "count": len(flagged)})
return
if path == "/api/species":
search = query.get("search", [""])[0].strip().lower()
species = list_species(search)
compact = [
{
"slug": item["slug"],
"title": item["title"],
"common_name": item["common_name"],
"scientific_name": item["scientific_name"],
"flelmr_code": item["flelmr_code"],
"summary": item["summary"],
"section_count": item["section_count"],
"diagnostic_count": len(item["diagnostics"]),
}
for item in species
]
self.write_json({"items": compact, "count": len(compact)})
return
if path.startswith("/api/species/"):
slug = path.rsplit("/", 1)[-1]
item = get_species_by_slug(slug)
if item is not None:
self.write_json(item)
return
self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
return
self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
def do_POST(self) -> None:
parsed = urlparse(self.path)
path = parsed.path
session = resolve_auth_session(self.headers)
if path.startswith("/api/editor/species/") and path.endswith("/workflow"):
if not self.require_role(session, "editor"):
return
payload = self.read_json_body()
if payload is None:
return
slug = path[len("/api/editor/species/") : -len("/workflow")].strip("/")
try:
result = update_species_editorial(
slug=slug,
publication_status=payload.get("publication_status"),
summary=None,
editor_notes=payload.get("editor_notes"),
is_archived=payload.get("is_archived"),
username=session.username,
)
except ValueError as exc:
self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST)
return
if result is None:
self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
return
self.write_json({"status": "ok", **result})
return
if path.startswith("/api/editor/species/") and path.endswith("/editorial"):
if not self.require_role(session, "editor"):
return
payload = self.read_json_body()
if payload is None:
return
slug = path[len("/api/editor/species/") : -len("/editorial")].strip("/")
try:
result = update_species_editorial(
slug=slug,
publication_status=payload.get("publication_status"),
summary=payload.get("summary"),
editor_notes=payload.get("editor_notes"),
is_archived=payload.get("is_archived"),
username=session.username,
)
except ValueError as exc:
self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST)
return
if result is None:
self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
return
self.write_json({"status": "ok", **result})
return
if path.startswith("/api/editor/species/") and "/sections/" in path:
if not self.require_role(session, "editor"):
return
payload = self.read_json_body()
if payload is None:
return
slug, _, tail = path[len("/api/editor/species/") :].partition("/sections/")
try:
section_position = int(tail.strip("/"))
except ValueError:
self.write_json({"error": "Invalid section position"}, status=HTTPStatus.BAD_REQUEST)
return
content = payload.get("content")
if not isinstance(content, str):
self.write_json({"error": "content must be a string"}, status=HTTPStatus.BAD_REQUEST)
return
result = update_species_section(
slug=slug.strip("/"),
section_position=section_position,
content=content,
username=session.username,
)
if result is None:
self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
return
self.write_json({"status": "ok", **result})
return
self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
def log_message(self, format: str, *args: object) -> None:
return
def require_role(self, session: AuthSession | None, required_role: str) -> bool:
if session is None:
self.write_json(
{
"error": "Authentication required",
"required_role": required_role,
"auth_configured": auth_is_configured(),
},
status=HTTPStatus.UNAUTHORIZED,
)
return False
if not role_satisfies(session.role, required_role):
self.write_json(
{
"error": "Insufficient role",
"required_role": required_role,
"user_role": session.role,
"username": session.username,
},
status=HTTPStatus.FORBIDDEN,
)
return False
return True
def read_json_body(self) -> dict[str, object] | None:
content_length = self.headers.get("Content-Length", "0").strip()
try:
length = int(content_length or "0")
except ValueError:
self.write_json({"error": "Invalid Content-Length"}, status=HTTPStatus.BAD_REQUEST)
return None
raw_body = self.rfile.read(length) if 0 < length else b"{}"
try:
parsed = json.loads(raw_body.decode("utf-8"))
except json.JSONDecodeError:
self.write_json({"error": "Invalid JSON body"}, status=HTTPStatus.BAD_REQUEST)
return None
if not isinstance(parsed, dict):
self.write_json({"error": "JSON body must be an object"}, status=HTTPStatus.BAD_REQUEST)
return None
return parsed
def write_json(self, payload: dict[str, object], status: HTTPStatus = HTTPStatus.OK) -> None:
body = json.dumps(payload, ensure_ascii=True).encode("utf-8")
self.send_response(status.value)
self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Content-Length", str(len(body)))
self.send_header("Access-Control-Allow-Origin", "*")
if status == HTTPStatus.UNAUTHORIZED:
self.send_header("WWW-Authenticate", 'Bearer realm="EcoSpecies-Atlas"')
self.end_headers()
self.wfile.write(body)
def main() -> None:
host = os.environ.get("ECOSPECIES_HOST", "127.0.0.1")
port = int(os.environ.get("ECOSPECIES_PORT", "8000"))
last_error: Exception | None = None
for attempt in range(1, 16):
try:
if not has_species_data():
import_species_payload(make_payload())
last_error = None
break
except Exception as exc: # pragma: no cover - startup retry path
last_error = exc
print(f"Startup dependency check failed on attempt {attempt}: {exc}")
time.sleep(2)
if last_error is not None:
raise last_error
server = ThreadingHTTPServer((host, port), EcoSpeciesHandler)
print(f"EcoSpecies API listening on http://{host}:{port}")
server.serve_forever()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,81 @@
from __future__ import annotations
import os
from dataclasses import dataclass
from typing import Mapping
ROLE_ORDER = {
"viewer": 1,
"editor": 2,
"admin": 3,
}
@dataclass(frozen=True)
class AuthSession:
token: str
username: str
role: str
def _normalize_role(role: str) -> str:
normalized = role.strip().lower()
if normalized not in ROLE_ORDER:
raise ValueError(f"Unsupported role: {role}")
return normalized
def _parse_token_entry(entry: str) -> tuple[str, AuthSession]:
parts = [part.strip() for part in entry.split(":")]
if len(parts) != 3:
raise ValueError(
"ECOSPECIES_AUTH_TOKENS entries must use the format token:username:role"
)
token, username, role = parts
if not token or not username:
raise ValueError("Auth token and username must be non-empty")
return token, AuthSession(token=token, username=username, role=_normalize_role(role))
def get_token_registry() -> dict[str, AuthSession]:
configured = os.environ.get("ECOSPECIES_AUTH_TOKENS", "").strip()
if not configured:
return {}
registry: dict[str, AuthSession] = {}
for raw_entry in configured.split(","):
entry = raw_entry.strip()
if not entry:
continue
token, session = _parse_token_entry(entry)
registry[token] = session
return registry
def get_bearer_token(headers: Mapping[str, str]) -> str | None:
auth_header = headers.get("Authorization", "").strip()
if auth_header.lower().startswith("bearer "):
token = auth_header[7:].strip()
return token or None
token = headers.get("X-EcoSpecies-Token", "").strip()
return token or None
def resolve_auth_session(headers: Mapping[str, str]) -> AuthSession | None:
registry = get_token_registry()
token = get_bearer_token(headers)
if not token:
return None
return registry.get(token)
def auth_is_configured() -> bool:
return bool(get_token_registry())
def role_satisfies(role: str, required_role: str) -> bool:
return ROLE_ORDER[_normalize_role(role)] >= ROLE_ORDER[_normalize_role(required_role)]

View File

@ -0,0 +1,14 @@
from __future__ import annotations
from ecospecies_api.app import make_payload
from ecospecies_api.repository import import_species_payload
def main() -> None:
payload = make_payload()
import_species_payload(payload)
print(f"Imported {len(payload)} species records.")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,24 @@
from __future__ import annotations
import os
from pathlib import Path
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
def get_database_url() -> str:
configured = os.environ.get("ECOSPECIES_DATABASE_URL")
if configured:
return configured
default_path = Path(__file__).resolve().parents[4] / "var" / "sqlite" / "ecospecies.db"
default_path.parent.mkdir(parents=True, exist_ok=True)
return f"sqlite:///{default_path}"
def create_db_engine():
return create_engine(get_database_url(), future=True)
SessionLocal = sessionmaker(bind=create_db_engine(), autoflush=False, autocommit=False, future=True)

View File

@ -0,0 +1,79 @@
from __future__ import annotations
from sqlalchemy import Boolean, ForeignKey, Integer, String, Text
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
class Base(DeclarativeBase):
pass
class Species(Base):
__tablename__ = "species"
id: Mapped[int] = mapped_column(Integer, primary_key=True)
slug: Mapped[str] = mapped_column(String(255), unique=True, index=True)
source_file: Mapped[str] = mapped_column(String(255))
title: Mapped[str] = mapped_column(String(500))
common_name: Mapped[str] = mapped_column(String(255), default="")
scientific_name: Mapped[str] = mapped_column(String(255), default="")
flelmr_code: Mapped[str] = mapped_column(String(64), default="")
summary: Mapped[str] = mapped_column(Text, default="")
section_count: Mapped[int] = mapped_column(Integer, default=0)
publication_status: Mapped[str] = mapped_column(String(32), default="published", index=True)
is_archived: Mapped[bool] = mapped_column(Boolean, default=False, index=True)
editor_notes: Mapped[str] = mapped_column(Text, default="")
last_modified_by: Mapped[str] = mapped_column(String(255), default="system-import")
sections: Mapped[list["DocumentSection"]] = relationship(
back_populates="species",
cascade="all, delete-orphan",
order_by="DocumentSection.position",
)
diagnostics: Mapped[list["IngestDiagnosticRecord"]] = relationship(
back_populates="species",
cascade="all, delete-orphan",
order_by="IngestDiagnosticRecord.id",
)
audit_entries: Mapped[list["SpeciesAuditLog"]] = relationship(
back_populates="species",
cascade="all, delete-orphan",
order_by="SpeciesAuditLog.id.desc()",
)
class DocumentSection(Base):
__tablename__ = "document_section"
id: Mapped[int] = mapped_column(Integer, primary_key=True)
species_id: Mapped[int] = mapped_column(ForeignKey("species.id", ondelete="CASCADE"), index=True)
position: Mapped[int] = mapped_column(Integer)
heading: Mapped[str] = mapped_column(String(255))
content: Mapped[str] = mapped_column(Text)
species: Mapped[Species] = relationship(back_populates="sections")
class IngestDiagnosticRecord(Base):
__tablename__ = "ingest_diagnostic"
id: Mapped[int] = mapped_column(Integer, primary_key=True)
species_id: Mapped[int] = mapped_column(ForeignKey("species.id", ondelete="CASCADE"), index=True)
level: Mapped[str] = mapped_column(String(32))
code: Mapped[str] = mapped_column(String(128), index=True)
message: Mapped[str] = mapped_column(Text)
species: Mapped[Species] = relationship(back_populates="diagnostics")
class SpeciesAuditLog(Base):
__tablename__ = "species_audit_log"
id: Mapped[int] = mapped_column(Integer, primary_key=True)
species_id: Mapped[int] = mapped_column(ForeignKey("species.id", ondelete="CASCADE"), index=True)
changed_by: Mapped[str] = mapped_column(String(255))
changed_at: Mapped[str] = mapped_column(String(64), index=True)
action: Mapped[str] = mapped_column(String(64))
details_json: Mapped[str] = mapped_column(Text)
species: Mapped[Species] = relationship(back_populates="audit_entries")

View File

@ -0,0 +1,224 @@
from __future__ import annotations
import os
import re
from dataclasses import dataclass
from pathlib import Path
SECTION_PATTERN = re.compile(r"^[A-Z][A-Z\s/&()-]{2,}$")
FIELD_PATTERN = re.compile(r"^(?P<key>[A-Za-z/ _-]+):\s*(?P<value>.*)$")
SUMMARY_MARKER_PATTERN = re.compile(r"^(summary(?:/abstract)?|abstract|executive summary):?\s*$", re.IGNORECASE)
@dataclass
class Section:
heading: str
content: str
@dataclass
class IngestDiagnostic:
level: str
code: str
message: str
@dataclass
class SpeciesRecord:
slug: str
source_file: str
title: str
common_name: str
scientific_name: str
flelmr_code: str
summary: str
section_count: int
sections: list[Section]
diagnostics: list[IngestDiagnostic]
def slugify(value: str) -> str:
cleaned = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
return cleaned or "unknown-species"
def normalize_text(raw_text: str) -> str:
text = raw_text.replace("\ufeff", "")
text = text.replace("\r\n", "\n").replace("\r", "\n")
return text
def normalize_whitespace(value: str) -> str:
return re.sub(r"\s+", " ", value).strip()
def split_sections(lines: list[str]) -> list[Section]:
sections: list[Section] = []
current_heading = "HEADER"
current_lines: list[str] = []
for raw_line in lines:
line = raw_line.rstrip()
stripped = line.strip()
if SECTION_PATTERN.fullmatch(stripped):
if current_lines:
sections.append(
Section(
heading=current_heading,
content="\n".join(current_lines).strip(),
)
)
current_heading = stripped
current_lines = []
continue
current_lines.append(line)
if current_lines:
sections.append(
Section(
heading=current_heading,
content="\n".join(current_lines).strip(),
)
)
return [section for section in sections if section.content]
def extract_metadata(lines: list[str]) -> dict[str, str]:
metadata: dict[str, str] = {}
for line in lines[:80]:
match = FIELD_PATTERN.match(line.strip())
if not match:
continue
key = match.group("key").strip().lower()
value = match.group("value").strip()
metadata[key] = value
# Legacy files vary between "FLELMR", "FLELMR Code", and similar labels.
if key.startswith("flelmr"):
metadata["flelmr"] = value
return metadata
def extract_summary(lines: list[str], sections: list[Section]) -> str:
if not sections:
return ""
header_content = ""
for section in sections:
if section.heading == "HEADER":
header_content = section.content
break
if not header_content:
return ""
header_lines = [line.rstrip() for line in header_content.splitlines()]
for index, raw_line in enumerate(header_lines):
if not SUMMARY_MARKER_PATTERN.fullmatch(raw_line.strip()):
continue
summary_lines: list[str] = []
for candidate in header_lines[index + 1 :]:
stripped = candidate.strip()
if not stripped:
if summary_lines:
summary_lines.append("")
continue
if SECTION_PATTERN.fullmatch(stripped):
break
if stripped.startswith("[") and not summary_lines:
break
summary_lines.append(stripped)
compact_lines = [line for line in summary_lines if line]
if compact_lines:
return normalize_whitespace(" ".join(compact_lines))
return ""
return ""
def parse_species_file(path: Path) -> SpeciesRecord:
lines = normalize_text(path.read_text(encoding="utf-8", errors="replace")).split("\n")
metadata = extract_metadata(lines)
sections = split_sections(lines)
diagnostics: list[IngestDiagnostic] = []
title = metadata.get("title", path.stem)
common_name = metadata.get("common name", path.stem.replace(" SLH", ""))
scientific_name = metadata.get("scientific name", "")
flelmr_code = metadata.get("flelmr", "")
summary = extract_summary(lines, sections)
if not scientific_name:
diagnostics.append(
IngestDiagnostic(
level="warning",
code="missing_scientific_name",
message="Scientific name could not be extracted from the source file.",
)
)
if not flelmr_code:
diagnostics.append(
IngestDiagnostic(
level="warning",
code="missing_flelmr_code",
message="FLELMR code could not be extracted from the source file.",
)
)
if len(sections) < 3:
diagnostics.append(
IngestDiagnostic(
level="warning",
code="low_section_count",
message="Very few top-level sections were detected; headings may need parser refinement.",
)
)
if not summary:
diagnostics.append(
IngestDiagnostic(
level="warning",
code="missing_summary",
message="No summary text was extracted from the source file.",
)
)
slug_base = common_name or title or path.stem
return SpeciesRecord(
slug=slugify(slug_base),
source_file=path.name,
title=title,
common_name=common_name,
scientific_name=scientific_name,
flelmr_code=flelmr_code,
summary=summary,
section_count=len(sections),
sections=sections,
diagnostics=diagnostics,
)
def load_species_records(data_dir: str) -> list[SpeciesRecord]:
base = Path(data_dir)
if not base.exists():
return []
records: list[SpeciesRecord] = []
for path in sorted(base.glob("*.txt")):
records.append(parse_species_file(path))
return records
def get_default_data_dir() -> str:
return os.environ.get(
"ECOSPECIES_DATA_DIR",
str(
Path(__file__).resolve().parents[4].parent
/ "01-legacy-code-and-data"
/ "InputFiles - TXT"
),
)

View File

@ -0,0 +1,508 @@
from __future__ import annotations
from collections import Counter
from datetime import datetime, timezone
import json
from sqlalchemy import inspect, select, text
from sqlalchemy.exc import SQLAlchemyError
from ecospecies_api.db import SessionLocal, create_db_engine
from ecospecies_api.models import Base, DocumentSection, IngestDiagnosticRecord, Species, SpeciesAuditLog
WORKFLOW_STATUSES = {"draft", "review", "published"}
SYSTEM_IMPORT_USER = "system-import"
def ensure_schema() -> None:
engine = create_db_engine()
Base.metadata.create_all(engine)
inspector = inspect(engine)
species_columns = {column["name"] for column in inspector.get_columns("species")}
statements: list[str] = []
if "publication_status" not in species_columns:
statements.append("ALTER TABLE species ADD COLUMN publication_status VARCHAR(32) DEFAULT 'published'")
if "is_archived" not in species_columns:
statements.append("ALTER TABLE species ADD COLUMN is_archived BOOLEAN DEFAULT FALSE")
if "editor_notes" not in species_columns:
statements.append("ALTER TABLE species ADD COLUMN editor_notes TEXT DEFAULT ''")
if "last_modified_by" not in species_columns:
statements.append("ALTER TABLE species ADD COLUMN last_modified_by VARCHAR(255) DEFAULT 'system-import'")
if statements:
with engine.begin() as connection:
for statement in statements:
connection.execute(text(statement))
connection.execute(
text(
"UPDATE species SET publication_status = COALESCE(publication_status, 'published'), "
"is_archived = COALESCE(is_archived, FALSE), "
"editor_notes = COALESCE(editor_notes, ''), "
"last_modified_by = COALESCE(last_modified_by, 'system-import')"
)
)
def import_species_payload(payload: list[dict[str, object]]) -> None:
ensure_schema()
with SessionLocal() as session:
existing_species = {
item.slug: item for item in session.scalars(select(Species)).all()
}
incoming_slugs: set[str] = set()
for item in payload:
slug = item["slug"]
incoming_slugs.add(slug)
species = existing_species.get(slug)
if species is None:
species = Species(
slug=slug,
source_file=item["source_file"],
title=item["title"],
common_name=item["common_name"],
scientific_name=item["scientific_name"],
flelmr_code=item["flelmr_code"],
summary=item["summary"],
section_count=item["section_count"],
publication_status="published",
is_archived=False,
editor_notes="",
last_modified_by=SYSTEM_IMPORT_USER,
)
session.add(species)
session.flush()
_ = species.sections
_ = species.diagnostics
_ = species.audit_entries
editorial_fields, section_positions = _get_editor_preservation_state(species)
species.source_file = item["source_file"]
species.title = item["title"]
species.common_name = item["common_name"]
species.scientific_name = item["scientific_name"]
species.flelmr_code = item["flelmr_code"]
species.section_count = item["section_count"]
if species.is_archived:
species.is_archived = False
session.add(
SpeciesAuditLog(
species_id=species.id,
changed_by=SYSTEM_IMPORT_USER,
changed_at=datetime.now(timezone.utc).isoformat(),
action="import_restore",
details_json=json.dumps(
{"is_archived": {"from": True, "to": False}},
ensure_ascii=True,
),
)
)
if "summary" not in editorial_fields:
species.summary = item["summary"]
if species.last_modified_by == "":
species.last_modified_by = SYSTEM_IMPORT_USER
existing_sections = {section.position: section for section in species.sections}
incoming_positions: set[int] = set()
for position, section_payload in enumerate(item["sections"], start=1):
incoming_positions.add(position)
section = existing_sections.get(position)
if section is None:
section = DocumentSection(
species_id=species.id,
position=position,
heading=section_payload["heading"],
content=section_payload["content"],
)
session.add(section)
continue
section.heading = section_payload["heading"]
if position not in section_positions:
section.content = section_payload["content"]
session.add(section)
for position, section in existing_sections.items():
if position not in incoming_positions:
session.delete(section)
for diagnostic in list(species.diagnostics):
session.delete(diagnostic)
for diagnostic in item["diagnostics"]:
if diagnostic["code"] == "missing_summary":
continue
session.add(
IngestDiagnosticRecord(
species_id=species.id,
level=diagnostic["level"],
code=diagnostic["code"],
message=diagnostic["message"],
)
)
session.add(species)
for slug, species in existing_species.items():
if slug in incoming_slugs:
continue
if not species.is_archived:
species.is_archived = True
session.add(
SpeciesAuditLog(
species_id=species.id,
changed_by=SYSTEM_IMPORT_USER,
changed_at=datetime.now(timezone.utc).isoformat(),
action="import_archive",
details_json=json.dumps(
{"is_archived": {"from": False, "to": True}},
ensure_ascii=True,
),
)
)
session.add(species)
session.commit()
def _get_editor_preservation_state(species: Species) -> tuple[set[str], set[int]]:
editorial_fields: set[str] = set()
section_positions: set[int] = set()
for entry in species.audit_entries:
try:
details = json.loads(entry.details_json)
except json.JSONDecodeError:
continue
if entry.action == "editorial_update":
editorial_fields.update(details.keys())
elif entry.action == "section_update":
section_position = details.get("section_position")
if isinstance(section_position, int):
section_positions.add(section_position)
return editorial_fields, section_positions
def has_species_data() -> bool:
ensure_schema()
with SessionLocal() as session:
species = session.scalar(select(Species.id).limit(1))
return species is not None
def get_readiness_status() -> dict[str, object]:
try:
ensure_schema()
with SessionLocal() as session:
species_count = session.query(Species).count()
return {
"ready": True,
"database": "ok",
"species_count": species_count,
"data_state": "loaded" if species_count > 0 else "empty",
}
except SQLAlchemyError as exc:
return {
"ready": False,
"database": "error",
"species_count": None,
"data_state": "unavailable",
"error": str(exc),
}
def _species_to_payload(species: Species, include_sections: bool = True) -> dict[str, object]:
return {
"slug": species.slug,
"source_file": species.source_file,
"title": species.title,
"common_name": species.common_name,
"scientific_name": species.scientific_name,
"flelmr_code": species.flelmr_code,
"summary": species.summary,
"section_count": species.section_count,
"publication_status": species.publication_status,
"is_archived": species.is_archived,
"editor_notes": species.editor_notes,
"last_modified_by": species.last_modified_by,
"diagnostics": [
{"level": diagnostic.level, "code": diagnostic.code, "message": diagnostic.message}
for diagnostic in species.diagnostics
],
"sections": (
[
{
"id": section.id,
"position": section.position,
"heading": section.heading,
"content": section.content,
}
for section in species.sections
]
if include_sections
else []
),
}
def list_species(
search: str = "",
include_unpublished: bool = False,
include_archived: bool = False,
) -> list[dict[str, object]]:
ensure_schema()
with SessionLocal() as session:
query = select(Species).order_by(Species.common_name, Species.title)
species = list(session.scalars(query))
payload = [_species_to_payload(item, include_sections=False) for item in species]
if not include_archived:
payload = [item for item in payload if not item["is_archived"]]
if not include_unpublished:
payload = [item for item in payload if item["publication_status"] == "published"]
if search:
needle = search.lower()
payload = [
item
for item in payload
if needle in item["common_name"].lower()
or needle in item["scientific_name"].lower()
or needle in item["title"].lower()
]
return payload
def get_species_by_slug(
slug: str,
include_unpublished: bool = False,
include_archived: bool = False,
) -> dict[str, object] | None:
ensure_schema()
with SessionLocal() as session:
species = session.scalar(select(Species).where(Species.slug == slug))
if species is None:
return None
if not include_archived and species.is_archived:
return None
if not include_unpublished and species.publication_status != "published":
return None
_ = species.sections
_ = species.diagnostics
return _species_to_payload(species, include_sections=True)
def get_summary_metrics() -> dict[str, object]:
species = list_species()
section_total = sum(item["section_count"] for item in species)
counter = Counter()
for item in species:
for diagnostic in item["diagnostics"]:
counter[diagnostic["code"]] += 1
return {
"species_count": len(species),
"section_count": section_total,
"diagnostic_counts": dict(counter),
}
def list_diagnostics() -> list[dict[str, object]]:
species = list_species()
return [
{
"slug": item["slug"],
"common_name": item["common_name"],
"source_file": item["source_file"],
"diagnostics": item["diagnostics"],
}
for item in species
if item["diagnostics"]
]
def get_editor_species_list(search: str = "") -> list[dict[str, object]]:
return list_species(search=search, include_unpublished=True, include_archived=True)
def get_editor_species_workflow(slug: str) -> dict[str, object] | None:
item = get_species_by_slug(slug, include_unpublished=True, include_archived=True)
if item is None:
return None
return {
"slug": item["slug"],
"title": item["title"],
"common_name": item["common_name"],
"publication_status": item["publication_status"],
"is_archived": item["is_archived"],
"editor_notes": item["editor_notes"],
"last_modified_by": item["last_modified_by"],
"diagnostic_count": len(item["diagnostics"]),
}
def get_editor_species_detail(slug: str) -> dict[str, object] | None:
return get_species_by_slug(slug, include_unpublished=True, include_archived=True)
def list_species_audit(slug: str) -> list[dict[str, object]] | None:
ensure_schema()
with SessionLocal() as session:
species = session.scalar(select(Species).where(Species.slug == slug))
if species is None:
return None
_ = species.audit_entries
return [
{
"id": entry.id,
"changed_by": entry.changed_by,
"changed_at": entry.changed_at,
"action": entry.action,
"details": json.loads(entry.details_json),
}
for entry in species.audit_entries
]
def update_species_editorial(
slug: str,
publication_status: str | None,
summary: str | None,
editor_notes: str | None,
is_archived: bool | None,
username: str,
) -> dict[str, object] | None:
ensure_schema()
with SessionLocal() as session:
species = session.scalar(select(Species).where(Species.slug == slug))
if species is None:
return None
before = {
"publication_status": species.publication_status,
"summary": species.summary,
"editor_notes": species.editor_notes,
"is_archived": species.is_archived,
}
if publication_status is not None:
normalized = publication_status.strip().lower()
if normalized not in WORKFLOW_STATUSES:
raise ValueError(
f"Unsupported publication_status: {publication_status}. "
f"Expected one of {sorted(WORKFLOW_STATUSES)}"
)
species.publication_status = normalized
if summary is not None:
species.summary = summary.strip()
if editor_notes is not None:
species.editor_notes = editor_notes.strip()
if is_archived is not None:
species.is_archived = is_archived
after = {
"publication_status": species.publication_status,
"summary": species.summary,
"editor_notes": species.editor_notes,
"is_archived": species.is_archived,
}
changed_fields = {
key: {"from": before[key], "to": after[key]}
for key in before
if before[key] != after[key]
}
if changed_fields:
species.last_modified_by = username
session.add(
SpeciesAuditLog(
species_id=species.id,
changed_by=username,
changed_at=datetime.now(timezone.utc).isoformat(),
action="editorial_update",
details_json=json.dumps(changed_fields, ensure_ascii=True),
)
)
session.add(species)
session.commit()
session.refresh(species)
return {
"slug": species.slug,
"summary": species.summary,
"publication_status": species.publication_status,
"editor_notes": species.editor_notes,
"is_archived": species.is_archived,
"last_modified_by": species.last_modified_by,
"changed_fields": changed_fields,
}
def update_species_section(
slug: str,
section_position: int,
content: str,
username: str,
) -> dict[str, object] | None:
ensure_schema()
with SessionLocal() as session:
species = session.scalar(select(Species).where(Species.slug == slug))
if species is None:
return None
section = session.scalar(
select(DocumentSection).where(
DocumentSection.species_id == species.id,
DocumentSection.position == section_position,
)
)
if section is None:
return None
new_content = content.strip()
changed_fields = {}
if section.content != new_content:
changed_fields["section_content"] = {
"from": section.content,
"to": new_content,
}
if changed_fields:
section.content = new_content
species.last_modified_by = username
session.add(
SpeciesAuditLog(
species_id=species.id,
changed_by=username,
changed_at=datetime.now(timezone.utc).isoformat(),
action="section_update",
details_json=json.dumps(
{
"section_position": section.position,
"section_heading": section.heading,
**changed_fields,
},
ensure_ascii=True,
),
)
)
session.add(section)
session.add(species)
session.commit()
session.refresh(section)
return {
"slug": species.slug,
"section": {
"id": section.id,
"position": section.position,
"heading": section.heading,
"content": section.content,
},
"last_modified_by": species.last_modified_by,
"changed_fields": changed_fields,
}

View File

@ -0,0 +1,21 @@
from __future__ import annotations
import importlib.util
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
TEST_PATH = ROOT / "tests" / "test_repository.py"
SPEC = importlib.util.spec_from_file_location("ecospecies_api_test_repository", TEST_PATH)
MODULE = importlib.util.module_from_spec(SPEC)
assert SPEC is not None and SPEC.loader is not None
SPEC.loader.exec_module(MODULE)
for name in dir(MODULE):
if name.startswith("Test") or name.endswith("Tests"):
globals()[name] = getattr(MODULE, name)

View File

@ -0,0 +1,307 @@
from __future__ import annotations
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from ecospecies_api import repository
SAMPLE_PAYLOAD = [
{
"slug": "test-shad",
"source_file": "Test Shad.txt",
"title": "Test Shad (Alosa testus)",
"common_name": "Test Shad",
"scientific_name": "Alosa testus",
"flelmr_code": "9999",
"summary": "",
"section_count": 2,
"diagnostics": [
{
"level": "warning",
"code": "missing_summary",
"message": "Summary/Abstract section is missing.",
},
{
"level": "warning",
"code": "missing_citations",
"message": "References section not found.",
},
],
"sections": [
{"heading": "HEADER", "content": "Header content"},
{"heading": "HABITAT", "content": "Habitat content"},
],
}
]
UPDATED_PAYLOAD = [
{
"slug": "test-shad",
"source_file": "Test Shad v2.txt",
"title": "Test Shad Revised (Alosa testus)",
"common_name": "Test Shad",
"scientific_name": "Alosa testus revised",
"flelmr_code": "1000",
"summary": "Imported replacement summary.",
"section_count": 2,
"diagnostics": [
{
"level": "warning",
"code": "missing_flelmr_code",
"message": "Replacement diagnostic.",
}
],
"sections": [
{"heading": "HEADER", "content": "Replacement header content"},
{"heading": "HABITAT", "content": "Replacement habitat content"},
],
}
]
DIFFERENT_PAYLOAD = [
{
"slug": "other-fish",
"source_file": "Other Fish.txt",
"title": "Other Fish (Pisces otherus)",
"common_name": "Other Fish",
"scientific_name": "Pisces otherus",
"flelmr_code": "2000",
"summary": "Other fish summary.",
"section_count": 1,
"diagnostics": [],
"sections": [
{"heading": "HEADER", "content": "Other fish header"},
],
}
]
class RepositoryWorkflowTests(unittest.TestCase):
def setUp(self) -> None:
self.tempdir = tempfile.TemporaryDirectory()
db_path = Path(self.tempdir.name) / "test.db"
self.engine = create_engine(f"sqlite:///{db_path}", future=True)
self.session_local = sessionmaker(
bind=self.engine,
autoflush=False,
autocommit=False,
future=True,
)
self.engine_patch = patch.object(repository, "create_db_engine", return_value=self.engine)
self.session_patch = patch.object(repository, "SessionLocal", self.session_local)
self.engine_patch.start()
self.session_patch.start()
repository.import_species_payload(SAMPLE_PAYLOAD)
def tearDown(self) -> None:
self.session_patch.stop()
self.engine_patch.stop()
self.engine.dispose()
self.tempdir.cleanup()
def test_import_filters_missing_summary_diagnostic_from_accepted_dataset(self) -> None:
detail = repository.get_species_by_slug("test-shad")
self.assertIsNotNone(detail)
self.assertEqual(detail["section_count"], 2)
self.assertEqual([section["position"] for section in detail["sections"]], [1, 2])
self.assertEqual([item["code"] for item in detail["diagnostics"]], ["missing_citations"])
def test_editorial_update_changes_publication_visibility_and_creates_audit(self) -> None:
result = repository.update_species_editorial(
slug="test-shad",
publication_status="draft",
summary="Editor-authored summary.",
editor_notes="Needs another review pass.",
is_archived=None,
username="bob",
)
self.assertIsNotNone(result)
self.assertEqual(result["publication_status"], "draft")
self.assertEqual(result["summary"], "Editor-authored summary.")
self.assertEqual(result["last_modified_by"], "bob")
self.assertEqual(repository.get_species_by_slug("test-shad"), None)
editor_detail = repository.get_editor_species_detail("test-shad")
audit = repository.list_species_audit("test-shad")
self.assertIsNotNone(editor_detail)
self.assertEqual(editor_detail["publication_status"], "draft")
self.assertEqual(editor_detail["summary"], "Editor-authored summary.")
self.assertEqual(editor_detail["editor_notes"], "Needs another review pass.")
self.assertIsNotNone(audit)
self.assertEqual(audit[0]["action"], "editorial_update")
self.assertEqual(audit[0]["changed_by"], "bob")
self.assertIn("summary", audit[0]["details"])
self.assertIn("publication_status", audit[0]["details"])
def test_section_update_records_section_audit_metadata(self) -> None:
result = repository.update_species_section(
slug="test-shad",
section_position=2,
content="Updated habitat content.",
username="carol",
)
self.assertIsNotNone(result)
self.assertEqual(result["section"]["position"], 2)
self.assertEqual(result["section"]["content"], "Updated habitat content.")
self.assertEqual(result["last_modified_by"], "carol")
self.assertEqual(sorted(result["changed_fields"].keys()), ["section_content"])
editor_detail = repository.get_editor_species_detail("test-shad")
audit = repository.list_species_audit("test-shad")
self.assertIsNotNone(editor_detail)
self.assertEqual(editor_detail["sections"][1]["content"], "Updated habitat content.")
self.assertIsNotNone(audit)
self.assertEqual(audit[0]["action"], "section_update")
self.assertEqual(audit[0]["changed_by"], "carol")
self.assertEqual(audit[0]["details"]["section_position"], 2)
self.assertEqual(audit[0]["details"]["section_heading"], "HABITAT")
self.assertEqual(
audit[0]["details"]["section_content"],
{"from": "Habitat content", "to": "Updated habitat content."},
)
def test_reimport_preserves_editorial_state_and_audit_history(self) -> None:
repository.update_species_editorial(
slug="test-shad",
publication_status="draft",
summary="Editor-authored summary.",
editor_notes="Needs another review pass.",
is_archived=None,
username="bob",
)
repository.update_species_section(
slug="test-shad",
section_position=2,
content="Updated habitat content.",
username="carol",
)
repository.import_species_payload(UPDATED_PAYLOAD)
editor_detail = repository.get_editor_species_detail("test-shad")
audit = repository.list_species_audit("test-shad")
self.assertIsNotNone(editor_detail)
self.assertEqual(editor_detail["source_file"], "Test Shad v2.txt")
self.assertEqual(editor_detail["title"], "Test Shad Revised (Alosa testus)")
self.assertEqual(editor_detail["scientific_name"], "Alosa testus revised")
self.assertEqual(editor_detail["flelmr_code"], "1000")
self.assertEqual(editor_detail["publication_status"], "draft")
self.assertEqual(editor_detail["summary"], "Editor-authored summary.")
self.assertEqual(editor_detail["editor_notes"], "Needs another review pass.")
self.assertEqual(editor_detail["sections"][0]["content"], "Replacement header content")
self.assertEqual(editor_detail["sections"][1]["content"], "Updated habitat content.")
self.assertEqual([item["code"] for item in editor_detail["diagnostics"]], ["missing_flelmr_code"])
self.assertIsNotNone(audit)
self.assertEqual(len(audit), 2)
self.assertEqual([entry["action"] for entry in audit], ["section_update", "editorial_update"])
def test_reimport_updates_summary_when_no_editorial_override_exists(self) -> None:
repository.import_species_payload(UPDATED_PAYLOAD)
detail = repository.get_species_by_slug("test-shad")
self.assertIsNotNone(detail)
self.assertEqual(detail["summary"], "Imported replacement summary.")
self.assertEqual(detail["sections"][0]["content"], "Replacement header content")
def test_editor_can_archive_species_explicitly(self) -> None:
result = repository.update_species_editorial(
slug="test-shad",
publication_status=None,
summary=None,
editor_notes=None,
is_archived=True,
username="dana",
)
public_detail = repository.get_species_by_slug("test-shad")
editor_detail = repository.get_editor_species_detail("test-shad")
audit = repository.list_species_audit("test-shad")
self.assertIsNotNone(result)
self.assertTrue(result["is_archived"])
self.assertEqual(result["last_modified_by"], "dana")
self.assertIsNone(public_detail)
self.assertIsNotNone(editor_detail)
self.assertTrue(editor_detail["is_archived"])
self.assertIsNotNone(audit)
self.assertEqual(audit[0]["action"], "editorial_update")
self.assertEqual(audit[0]["details"]["is_archived"], {"from": False, "to": True})
def test_editor_can_unarchive_species_explicitly(self) -> None:
repository.update_species_editorial(
slug="test-shad",
publication_status=None,
summary=None,
editor_notes=None,
is_archived=True,
username="dana",
)
result = repository.update_species_editorial(
slug="test-shad",
publication_status=None,
summary=None,
editor_notes=None,
is_archived=False,
username="erin",
)
public_detail = repository.get_species_by_slug("test-shad")
audit = repository.list_species_audit("test-shad")
self.assertIsNotNone(result)
self.assertFalse(result["is_archived"])
self.assertEqual(result["last_modified_by"], "erin")
self.assertIsNotNone(public_detail)
self.assertIsNotNone(audit)
self.assertEqual(audit[0]["details"]["is_archived"], {"from": True, "to": False})
def test_missing_species_is_archived_instead_of_deleted(self) -> None:
repository.import_species_payload(DIFFERENT_PAYLOAD)
public_detail = repository.get_species_by_slug("test-shad")
editor_detail = repository.get_editor_species_detail("test-shad")
editor_items = repository.get_editor_species_list()
audit = repository.list_species_audit("test-shad")
self.assertIsNone(public_detail)
self.assertIsNotNone(editor_detail)
self.assertTrue(editor_detail["is_archived"])
self.assertEqual([item["slug"] for item in repository.list_species()], ["other-fish"])
self.assertEqual([item["slug"] for item in editor_items], ["other-fish", "test-shad"])
self.assertIsNotNone(audit)
self.assertEqual(audit[0]["action"], "import_archive")
self.assertEqual(audit[0]["details"]["is_archived"], {"from": False, "to": True})
def test_archived_species_is_restored_when_it_reappears(self) -> None:
repository.import_species_payload(DIFFERENT_PAYLOAD)
repository.import_species_payload(UPDATED_PAYLOAD)
public_detail = repository.get_species_by_slug("test-shad")
editor_detail = repository.get_editor_species_detail("test-shad")
audit = repository.list_species_audit("test-shad")
self.assertIsNotNone(public_detail)
self.assertIsNotNone(editor_detail)
self.assertFalse(editor_detail["is_archived"])
self.assertEqual(public_detail["summary"], "Imported replacement summary.")
self.assertIsNotNone(audit)
self.assertEqual(audit[0]["action"], "import_restore")
self.assertEqual(audit[0]["details"]["is_archived"], {"from": True, "to": False})
if __name__ == "__main__":
unittest.main()

349
apps/web/app.js Normal file
View File

@ -0,0 +1,349 @@
const apiBase = "";
const speciesList = document.querySelector("#species-list");
const searchInput = document.querySelector("#search");
const archiveFilterGroup = document.querySelector("#archive-filter-group");
const detailEmpty = document.querySelector("#detail-empty");
const detail = document.querySelector("#detail");
const detailCode = document.querySelector("#detail-code");
const detailCommonName = document.querySelector("#detail-common-name");
const detailArchiveBadge = document.querySelector("#detail-archive-badge");
const detailArchiveNote = document.querySelector("#detail-archive-note");
const detailScientificName = document.querySelector("#detail-scientific-name");
const detailSummary = document.querySelector("#detail-summary");
const detailSections = document.querySelector("#detail-sections");
const speciesCount = document.querySelector("#species-count");
const sectionCount = document.querySelector("#section-count");
const authTokenInput = document.querySelector("#auth-token");
const authSaveButton = document.querySelector("#auth-save");
const authClearButton = document.querySelector("#auth-clear");
const authStatus = document.querySelector("#auth-status");
const editorPanel = document.querySelector("#editor-panel");
const editorPublicationStatus = document.querySelector("#editor-publication-status");
const editorSummary = document.querySelector("#editor-summary");
const editorNotes = document.querySelector("#editor-notes");
const editorIsArchived = document.querySelector("#editor-is-archived");
const editorSaveButton = document.querySelector("#editor-save");
const editorStatus = document.querySelector("#editor-status");
const auditPanel = document.querySelector("#audit-panel");
const auditList = document.querySelector("#audit-list");
let currentItems = [];
let currentSlug = null;
let currentSession = null;
let currentArchiveFilter = "active";
function getAuthToken() {
return window.localStorage.getItem("ecospecies_auth_token") || "";
}
function getAuthHeaders() {
const token = getAuthToken();
return token ? { Authorization: `Bearer ${token}` } : {};
}
function escapeHtml(value) {
return value
.replaceAll("&", "&amp;")
.replaceAll("<", "&lt;")
.replaceAll(">", "&gt;");
}
async function requestJson(path, options = {}) {
const headers = new Headers(options.headers || {});
const authHeaders = getAuthHeaders();
for (const [key, value] of Object.entries(authHeaders)) {
headers.set(key, value);
}
if (options.body && !headers.has("Content-Type")) {
headers.set("Content-Type", "application/json");
}
const response = await fetch(`${apiBase}${path}`, { ...options, headers });
const data = await response.json();
return { response, data };
}
function isEditorSession() {
return Boolean(currentSession && currentSession.user && ["editor", "admin"].includes(currentSession.user.role));
}
function getVisibleItems(items) {
if (!isEditorSession()) {
return items;
}
if (currentArchiveFilter === "archived") {
return items.filter((item) => item.is_archived);
}
if (currentArchiveFilter === "all") {
return items;
}
return items.filter((item) => !item.is_archived);
}
function syncArchiveFilterUi() {
archiveFilterGroup.classList.toggle("hidden", !isEditorSession());
for (const button of archiveFilterGroup.querySelectorAll("[data-archive-filter]")) {
button.classList.toggle("is-active", button.dataset.archiveFilter === currentArchiveFilter);
}
}
async function loadSession() {
const { data } = await requestJson("/api/auth/session");
currentSession = data;
if (!isEditorSession()) {
currentArchiveFilter = "active";
}
authTokenInput.value = getAuthToken();
if (data.authenticated) {
authStatus.textContent = `${data.user.username} (${data.user.role})`;
} else if (data.auth_configured) {
authStatus.textContent = "Auth configured, public session";
} else {
authStatus.textContent = "Public access";
}
syncArchiveFilterUi();
}
async function loadSummary() {
const { data } = await requestJson("/api/insights/summary");
speciesCount.textContent = data.species_count;
sectionCount.textContent = data.section_count;
}
function renderSpecies(items) {
speciesList.innerHTML = "";
const visibleItems = getVisibleItems(items);
if (!visibleItems.length) {
speciesList.innerHTML = `<p class="editor-status">${isEditorSession() ? "No species match the current archive filter." : "No species match the current search."}</p>`;
return;
}
for (const item of visibleItems) {
const button = document.createElement("button");
button.className = item.is_archived ? "species-card species-card-archived" : "species-card";
button.type = "button";
const archivedMeta = item.is_archived ? `<span class="species-state-badge">Archived</span>` : "";
button.innerHTML = `
<span class="species-name">${escapeHtml(item.common_name || item.title)}</span>
<span class="species-meta">${escapeHtml(item.scientific_name || "Scientific name missing")}</span>
<span class="species-meta">${escapeHtml(item.publication_status || "published")}${archivedMeta}</span>
<span class="species-meta">${item.diagnostic_count ? `${item.diagnostic_count} ingest flags` : "No ingest flags"}</span>
<span class="species-snippet">${escapeHtml((item.summary || "No summary extracted yet.").slice(0, 180))}</span>
`;
button.addEventListener("click", () => loadSpecies(item.slug));
speciesList.appendChild(button);
}
}
async function loadSpeciesList(search = "") {
const query = search ? `?search=${encodeURIComponent(search)}` : "";
const path = isEditorSession() ? `/api/editor/species${query}` : `/api/species${query}`;
const { data } = await requestJson(path);
currentItems = data.items;
syncArchiveFilterUi();
renderSpecies(currentItems);
}
async function loadSpecies(slug) {
currentSlug = slug;
const path = isEditorSession() ? `/api/editor/species/${slug}` : `/api/species/${slug}`;
const { response, data } = await requestJson(path);
if (!response.ok) {
detailEmpty.classList.remove("hidden");
detail.classList.add("hidden");
speciesList.innerHTML = `<p class="error">${escapeHtml(data.error || "Unable to load species.")}</p>`;
return;
}
detailEmpty.classList.add("hidden");
detail.classList.remove("hidden");
detailCode.textContent = data.flelmr_code ? `FLELMR ${data.flelmr_code}` : "Legacy source file";
detailCommonName.textContent = data.common_name || data.title;
detailArchiveBadge.classList.toggle("hidden", !data.is_archived);
detailArchiveNote.classList.toggle("hidden", !data.is_archived);
detailScientificName.textContent = data.scientific_name || "Scientific name missing in source";
detailSummary.textContent = data.summary || "No summary extracted from the current source file.";
editorPanel.classList.toggle("hidden", !isEditorSession());
auditPanel.classList.toggle("hidden", !isEditorSession());
if (isEditorSession()) {
editorPublicationStatus.value = data.publication_status || "published";
editorSummary.value = data.summary || "";
editorNotes.value = data.editor_notes || "";
editorIsArchived.checked = Boolean(data.is_archived);
editorStatus.textContent = data.last_modified_by
? `Last modified by ${data.last_modified_by}`
: "Editor session active";
await loadAudit(slug);
}
detailSections.innerHTML = "";
if (data.diagnostics.length) {
const diagnosticsEl = document.createElement("section");
diagnosticsEl.className = "detail-section detail-diagnostics";
diagnosticsEl.innerHTML = `
<h3>Ingest Diagnostics</h3>
<ul class="diagnostic-list">
${data.diagnostics
.map(
(diagnostic) =>
`<li><strong>${escapeHtml(diagnostic.code)}</strong>: ${escapeHtml(diagnostic.message)}</li>`,
)
.join("")}
</ul>
`;
detailSections.appendChild(diagnosticsEl);
}
for (const section of data.sections) {
const sectionEl = document.createElement("section");
sectionEl.className = "detail-section";
if (isEditorSession()) {
sectionEl.innerHTML = `
<h3>${escapeHtml(section.heading)}</h3>
<textarea class="section-editor" data-section-position="${section.position}" rows="10">${escapeHtml(section.content)}</textarea>
<div class="editor-actions">
<button type="button" class="section-save" data-section-position="${section.position}">Save Section</button>
</div>
`;
} else {
sectionEl.innerHTML = `
<h3>${escapeHtml(section.heading)}</h3>
<pre>${escapeHtml(section.content)}</pre>
`;
}
detailSections.appendChild(sectionEl);
}
if (isEditorSession()) {
for (const button of detailSections.querySelectorAll(".section-save")) {
button.addEventListener("click", async (event) => {
const position = event.currentTarget.dataset.sectionPosition;
const textarea = detailSections.querySelector(`textarea[data-section-position="${position}"]`);
await saveSectionContent(Number(position), textarea.value);
});
}
}
}
function renderAudit(items) {
auditList.innerHTML = "";
if (!items.length) {
auditList.innerHTML = `<p class="editor-status">No audit entries yet.</p>`;
return;
}
for (const item of items) {
const entry = document.createElement("article");
entry.className = "audit-entry";
const detailRows = Object.entries(item.details)
.map(([field, values]) => {
if (values && typeof values === "object" && "from" in values && "to" in values) {
return `<li><strong>${escapeHtml(field)}</strong>: ${escapeHtml(String(values.from || ""))} -> ${escapeHtml(String(values.to || ""))}</li>`;
}
return `<li><strong>${escapeHtml(field)}</strong>: ${escapeHtml(String(values ?? ""))}</li>`;
})
.join("");
entry.innerHTML = `
<p class="audit-meta">${escapeHtml(item.changed_by)} ${escapeHtml(item.changed_at)} ${escapeHtml(item.action)}</p>
<ul class="diagnostic-list">${detailRows}</ul>
`;
auditList.appendChild(entry);
}
}
async function loadAudit(slug) {
if (!isEditorSession()) {
return;
}
const { response, data } = await requestJson(`/api/editor/species/${slug}/audit`);
if (!response.ok) {
auditList.innerHTML = `<p class="error">${escapeHtml(data.error || "Unable to load audit history.")}</p>`;
return;
}
renderAudit(data.items);
}
async function saveEditorialChanges() {
if (!currentSlug || !isEditorSession()) {
return;
}
editorStatus.textContent = "Saving...";
const { response, data } = await requestJson(`/api/editor/species/${currentSlug}/editorial`, {
method: "POST",
body: JSON.stringify({
publication_status: editorPublicationStatus.value,
summary: editorSummary.value,
editor_notes: editorNotes.value,
is_archived: editorIsArchived.checked,
}),
});
if (!response.ok) {
editorStatus.textContent = data.error || "Save failed";
return;
}
editorStatus.textContent = `Saved by ${data.last_modified_by}`;
await Promise.all([loadSummary(), loadSpeciesList(searchInput.value), loadSpecies(currentSlug)]);
}
async function saveSectionContent(sectionPosition, content) {
if (!currentSlug || !isEditorSession()) {
return;
}
editorStatus.textContent = `Saving section ${sectionPosition}...`;
const { response, data } = await requestJson(`/api/editor/species/${currentSlug}/sections/${sectionPosition}`, {
method: "POST",
body: JSON.stringify({ content }),
});
if (!response.ok) {
editorStatus.textContent = data.error || "Section save failed";
return;
}
editorStatus.textContent = `Section ${sectionPosition} saved by ${data.last_modified_by}`;
await loadSpecies(currentSlug);
}
searchInput.addEventListener("input", async (event) => {
await loadSpeciesList(event.target.value);
});
for (const button of archiveFilterGroup.querySelectorAll("[data-archive-filter]")) {
button.addEventListener("click", () => {
currentArchiveFilter = button.dataset.archiveFilter || "active";
syncArchiveFilterUi();
renderSpecies(currentItems);
});
}
authSaveButton.addEventListener("click", async () => {
const token = authTokenInput.value.trim();
if (token) {
window.localStorage.setItem("ecospecies_auth_token", token);
}
await loadSession();
await loadSpeciesList(searchInput.value);
if (currentSlug) {
await loadSpecies(currentSlug);
}
});
authClearButton.addEventListener("click", async () => {
window.localStorage.removeItem("ecospecies_auth_token");
authTokenInput.value = "";
await loadSession();
await loadSpeciesList(searchInput.value);
if (currentSlug) {
await loadSpecies(currentSlug);
}
});
editorSaveButton.addEventListener("click", saveEditorialChanges);
async function bootstrap() {
await loadSession();
await Promise.all([loadSummary(), loadSpeciesList()]);
}
bootstrap().catch((error) => {
speciesList.innerHTML = `<p class="error">Failed to load data: ${escapeHtml(String(error))}</p>`;
});

109
apps/web/index.html Normal file
View File

@ -0,0 +1,109 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>EcoSpecies</title>
<link rel="stylesheet" href="./styles.css">
</head>
<body>
<main class="page">
<section class="hero">
<p class="eyebrow">Marine Species Knowledge System</p>
<h1>EcoSpecies</h1>
<p class="lede">
A modern follow-on for the legacy EcoSpecies archive, starting with direct ingestion
of historical Species Life History text files.
</p>
<div class="auth-bar">
<input id="auth-token" type="password" placeholder="Bearer token for editor access">
<button id="auth-save" type="button">Use Token</button>
<button id="auth-clear" type="button" class="secondary-button">Clear</button>
<p id="auth-status" class="auth-status">Public access</p>
</div>
<div class="hero-stats">
<div class="stat">
<span id="species-count">0</span>
<label>Species loaded</label>
</div>
<div class="stat">
<span id="section-count">0</span>
<label>Parsed sections</label>
</div>
</div>
</section>
<section class="workspace">
<aside class="panel panel-list">
<div class="panel-header">
<h2>Species</h2>
<input id="search" type="search" placeholder="Search common or scientific name">
<div id="archive-filter-group" class="archive-filter-group hidden">
<button type="button" class="archive-filter-button is-active" data-archive-filter="active">Active</button>
<button type="button" class="archive-filter-button" data-archive-filter="all">All</button>
<button type="button" class="archive-filter-button" data-archive-filter="archived">Archived</button>
</div>
</div>
<div id="species-list" class="species-list"></div>
</aside>
<section class="panel panel-detail">
<div id="detail-empty" class="empty-state">
<h2>Select a species</h2>
<p>Browse the migrated legacy corpus and inspect parsed sections from the original SLH files.</p>
</div>
<article id="detail" class="detail hidden">
<header class="detail-header">
<p id="detail-code" class="detail-code"></p>
<div class="detail-title-row">
<h2 id="detail-common-name"></h2>
<span id="detail-archive-badge" class="detail-badge hidden">Archived</span>
</div>
<p id="detail-scientific-name" class="detail-scientific-name"></p>
<p id="detail-summary" class="detail-summary"></p>
<p id="detail-archive-note" class="detail-archive-note hidden">
This record is archived. It is hidden from public endpoints but remains available to editors for audit and recovery.
</p>
</header>
<section id="editor-panel" class="detail-section editor-panel hidden">
<h3>Editor Controls</h3>
<label class="editor-label" for="editor-publication-status">Publication Status</label>
<select id="editor-publication-status">
<option value="draft">Draft</option>
<option value="review">Review</option>
<option value="published">Published</option>
</select>
<label class="editor-label" for="editor-summary">Summary</label>
<textarea id="editor-summary" rows="5" placeholder="Write a concise executive summary."></textarea>
<label class="editor-label" for="editor-notes">Editor Notes</label>
<textarea id="editor-notes" rows="4" placeholder="Internal editorial notes"></textarea>
<label class="archive-toggle">
<input id="editor-is-archived" type="checkbox">
<span>Archive this species</span>
</label>
<div class="editor-actions">
<button id="editor-save" type="button">Save Editorial Changes</button>
<p id="editor-status" class="editor-status"></p>
</div>
</section>
<section id="audit-panel" class="detail-section hidden">
<h3>Audit History</h3>
<div id="audit-list" class="audit-list"></div>
</section>
<div id="detail-sections" class="detail-sections"></div>
</article>
</section>
</section>
<footer class="footer">
<p>
This migration path preserves attribution for Dr. Peter Rubec, Dr. Diane Blackwood,
Dr. Welsbery R. Elsberry, and the Florida Fish and Wildlife Research Institute context
documented in the legacy project materials.
</p>
</footer>
</main>
<script src="./app.js" defer></script>
</body>
</html>

32
apps/web/nginx.conf Normal file
View File

@ -0,0 +1,32 @@
server {
listen 80;
server_name _;
root /usr/share/nginx/html;
index index.html;
location /api/ {
proxy_pass http://api:8000/api/;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
location /healthz {
proxy_pass http://api:8000/healthz;
proxy_http_version 1.1;
proxy_set_header Host $host;
}
location /readyz {
proxy_pass http://api:8000/readyz;
proxy_http_version 1.1;
proxy_set_header Host $host;
}
location / {
try_files $uri $uri/ /index.html;
}
}

427
apps/web/styles.css Normal file
View File

@ -0,0 +1,427 @@
:root {
--bg: #f4efe6;
--paper: rgba(255, 252, 247, 0.78);
--ink: #16251f;
--muted: #58655f;
--accent: #0f766e;
--accent-2: #bc6c25;
--line: rgba(22, 37, 31, 0.12);
--shadow: 0 24px 70px rgba(24, 35, 30, 0.15);
}
* {
box-sizing: border-box;
}
body {
margin: 0;
font-family: Georgia, "Times New Roman", serif;
color: var(--ink);
background:
radial-gradient(circle at top left, rgba(15, 118, 110, 0.14), transparent 28%),
radial-gradient(circle at top right, rgba(188, 108, 37, 0.16), transparent 24%),
linear-gradient(180deg, #f8f4ec, #efe6d7 70%, #e7dcc9);
}
.page {
width: min(1320px, calc(100vw - 32px));
margin: 0 auto;
padding: 24px 0 40px;
}
.hero,
.panel,
.footer {
backdrop-filter: blur(10px);
background: var(--paper);
border: 1px solid var(--line);
border-radius: 24px;
box-shadow: var(--shadow);
}
.hero {
padding: 28px;
margin-bottom: 20px;
}
.eyebrow {
margin: 0 0 10px;
text-transform: uppercase;
letter-spacing: 0.18em;
font-size: 0.78rem;
color: var(--accent);
}
h1 {
margin: 0;
font-size: clamp(2.8rem, 7vw, 5.6rem);
line-height: 0.92;
}
.lede {
max-width: 64ch;
color: var(--muted);
font-size: 1.08rem;
}
.hero-stats {
display: flex;
gap: 16px;
flex-wrap: wrap;
margin-top: 24px;
}
.auth-bar {
display: flex;
gap: 10px;
flex-wrap: wrap;
align-items: center;
margin-top: 18px;
}
.auth-bar input {
min-width: min(360px, 100%);
flex: 1;
}
.auth-status {
margin: 0;
color: var(--muted);
}
.stat {
min-width: 180px;
padding: 14px 16px;
border-radius: 18px;
background: rgba(255, 255, 255, 0.6);
border: 1px solid var(--line);
}
.stat span {
display: block;
font-size: 2rem;
font-weight: 700;
}
.stat label {
color: var(--muted);
}
.workspace {
display: grid;
grid-template-columns: 360px minmax(0, 1fr);
gap: 20px;
}
.panel {
padding: 18px;
}
.panel-header {
display: flex;
flex-direction: column;
gap: 12px;
margin-bottom: 14px;
}
.panel-header h2,
.empty-state h2,
.detail-header h2 {
margin: 0;
}
.archive-filter-group {
display: flex;
flex-wrap: wrap;
gap: 8px;
}
.archive-filter-button {
padding: 8px 12px;
background: rgba(255, 255, 255, 0.72);
color: var(--muted);
border-color: var(--line);
}
.archive-filter-button.is-active {
background: var(--accent);
color: white;
border-color: transparent;
}
input[type="search"] {
width: 100%;
border: 1px solid var(--line);
border-radius: 999px;
padding: 12px 16px;
font: inherit;
background: rgba(255, 255, 255, 0.9);
}
select,
textarea,
button {
font: inherit;
}
textarea,
select {
width: 100%;
border: 1px solid var(--line);
border-radius: 18px;
padding: 12px 14px;
background: rgba(255, 255, 255, 0.92);
}
button {
border: 1px solid transparent;
border-radius: 999px;
padding: 11px 16px;
background: var(--accent);
color: white;
cursor: pointer;
}
.secondary-button {
background: rgba(255, 255, 255, 0.7);
color: var(--ink);
border-color: var(--line);
}
.species-list {
display: flex;
flex-direction: column;
gap: 12px;
max-height: 70vh;
overflow: auto;
}
.species-card {
text-align: left;
padding: 14px;
border-radius: 18px;
border: 1px solid var(--line);
background: linear-gradient(180deg, rgba(255, 255, 255, 0.95), rgba(241, 237, 230, 0.95));
cursor: pointer;
transition: transform 160ms ease, border-color 160ms ease;
}
.species-card:hover {
transform: translateY(-1px);
border-color: rgba(15, 118, 110, 0.45);
}
.species-card-archived {
border-style: dashed;
background: linear-gradient(180deg, rgba(247, 241, 231, 0.98), rgba(233, 226, 214, 0.98));
}
.species-name,
.species-meta,
.species-snippet {
display: block;
}
.species-name {
font-size: 1.05rem;
font-weight: 700;
}
.species-meta,
.species-snippet,
.detail-scientific-name,
.detail-summary,
.footer p,
.error {
color: var(--muted);
}
.species-snippet {
margin-top: 6px;
font-size: 0.92rem;
}
.species-state-badge,
.detail-badge {
display: inline-flex;
align-items: center;
gap: 6px;
padding: 3px 10px;
border-radius: 999px;
border: 1px solid rgba(188, 108, 37, 0.32);
background: rgba(188, 108, 37, 0.12);
color: var(--accent-2);
font-size: 0.78rem;
font-weight: 700;
letter-spacing: 0.04em;
text-transform: uppercase;
}
.species-state-badge {
margin-left: 8px;
}
.empty-state {
min-height: 320px;
display: grid;
place-items: center;
text-align: center;
}
.detail.hidden,
.hidden {
display: none;
}
.detail-header {
padding-bottom: 16px;
border-bottom: 1px solid var(--line);
}
.detail-title-row {
display: flex;
align-items: center;
gap: 12px;
flex-wrap: wrap;
}
.detail-code {
margin: 0 0 8px;
text-transform: uppercase;
letter-spacing: 0.14em;
color: var(--accent-2);
font-size: 0.8rem;
}
.detail-scientific-name {
margin-top: 6px;
font-style: italic;
}
.detail-archive-note {
margin: 12px 0 0;
padding: 12px 14px;
border-radius: 14px;
background: rgba(188, 108, 37, 0.1);
border: 1px solid rgba(188, 108, 37, 0.2);
color: var(--accent-2);
}
.detail-sections {
display: grid;
gap: 16px;
margin-top: 18px;
}
.detail-section {
padding: 16px;
border-radius: 18px;
border: 1px solid var(--line);
background: rgba(255, 255, 255, 0.66);
}
.detail-section h3 {
margin-top: 0;
color: var(--accent);
}
.editor-panel {
margin-top: 18px;
}
.editor-label {
display: block;
margin: 0 0 8px;
font-weight: 700;
}
.editor-label + select,
.editor-label + textarea {
margin-bottom: 14px;
}
.archive-toggle {
display: flex;
align-items: center;
gap: 10px;
margin: 0 0 14px;
color: var(--ink);
font-weight: 700;
}
.archive-toggle input {
width: 18px;
height: 18px;
accent-color: var(--accent-2);
}
.editor-actions {
display: flex;
gap: 12px;
align-items: center;
flex-wrap: wrap;
}
.editor-status {
margin: 0;
color: var(--muted);
}
.audit-list {
display: grid;
gap: 12px;
}
.audit-entry {
padding: 14px;
border-radius: 16px;
border: 1px solid var(--line);
background: rgba(255, 255, 255, 0.72);
}
.audit-meta {
margin: 0 0 10px;
color: var(--muted);
font-size: 0.92rem;
}
.section-editor {
min-height: 220px;
margin-bottom: 12px;
white-space: pre-wrap;
font-family: "Courier New", monospace;
font-size: 0.92rem;
line-height: 1.45;
}
.diagnostic-list {
margin: 0;
padding-left: 18px;
}
.diagnostic-list li + li {
margin-top: 8px;
}
pre {
margin: 0;
white-space: pre-wrap;
font-family: "Courier New", monospace;
font-size: 0.92rem;
line-height: 1.45;
}
.footer {
margin-top: 20px;
padding: 18px 22px;
}
@media (max-width: 960px) {
.workspace {
grid-template-columns: 1fr;
}
.species-list {
max-height: 40vh;
}
}

78
docker-compose.yml Normal file
View File

@ -0,0 +1,78 @@
services:
db:
image: postgres:16-alpine
environment:
POSTGRES_DB: ecospecies
POSTGRES_USER: ecospecies
POSTGRES_PASSWORD: ecospecies
PGDATA: /var/lib/postgresql/data/pgdata
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ecospecies -d ecospecies"]
interval: 5s
timeout: 5s
retries: 10
ports:
- "${ECOSPECIES_DB_PORT:-5432}:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
importer:
image: python:3.12-slim
depends_on:
db:
condition: service_healthy
working_dir: /workspace
environment:
ECOSPECIES_DATA_DIR: /legacy-data/InputFiles - TXT
ECOSPECIES_DATABASE_URL: postgresql+psycopg://ecospecies:ecospecies@db:5432/ecospecies
ECOSPECIES_VENV_DIR: /workspace/.docker/venv
PYTHONPATH: /workspace/apps/api/src
command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-import.sh"]
volumes:
- .:/workspace
- ../01-legacy-code-and-data:/legacy-data:ro
- python_venv:/workspace/.docker/venv
- pip_cache:/root/.cache/pip
api:
image: python:3.12-slim
restart: unless-stopped
depends_on:
db:
condition: service_healthy
importer:
condition: service_completed_successfully
working_dir: /workspace
environment:
ECOSPECIES_DATA_DIR: /legacy-data/InputFiles - TXT
ECOSPECIES_DATABASE_URL: postgresql+psycopg://ecospecies:ecospecies@db:5432/ecospecies
ECOSPECIES_HOST: 0.0.0.0
ECOSPECIES_PORT: "8000"
ECOSPECIES_AUTH_TOKENS: ${ECOSPECIES_AUTH_TOKENS:-}
ECOSPECIES_VENV_DIR: /workspace/.docker/venv
PYTHONPATH: /workspace/apps/api/src
command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-api.sh"]
ports:
- "${ECOSPECIES_API_PORT:-8000}:8000"
volumes:
- .:/workspace
- ../01-legacy-code-and-data:/legacy-data:ro
- python_venv:/workspace/.docker/venv
- pip_cache:/root/.cache/pip
web:
image: nginx:1.27-alpine
restart: unless-stopped
depends_on:
api:
condition: service_started
ports:
- "${ECOSPECIES_WEB_PORT:-8080}:80"
volumes:
- ./apps/web:/usr/share/nginx/html:ro
- ./apps/web/nginx.conf:/etc/nginx/conf.d/default.conf:ro
volumes:
postgres_data:
python_venv:
pip_cache:

44
docs/flelmr-authoring.md Normal file
View File

@ -0,0 +1,44 @@
# FLELMR Authoring Notes
## Summary Field
The legacy outline reserves header slot `1.4` for `Summary/Abstract` or `Executive Summary`, but the historical corpus in this repository does not contain populated examples. Future FLELMR-compatible records should treat this as a short abstract for the full species life history.
## Recommended Format
Place the summary in the `HEADER` section immediately after the FLELMR code block:
```text
Summary/Abstract:
Short paragraph here.
```
Accepted marker variants:
- `Summary/Abstract`
- `Summary/Abstract:`
- `Summary`
- `Summary:`
- `Abstract`
- `Abstract:`
- `Executive Summary`
- `Executive Summary:`
## Intended Meaning
The field should be a concise executive overview of the record, not metadata and not a section outline. It should help a reader understand the species before they read the detailed habitat, trophic, reproductive, and citation sections.
## Suggested Content
Write 3 to 6 sentences that cover:
- species identity and ecological or management importance
- general geographic range
- major habitat use or life-stage pattern
- notable trophic or reproductive traits
- major conservation, fishery, or data-status note if relevant
## Publishing Rule
- During ingestion or editorial review, a missing summary should be treated as a warning.
- In accepted datasets, the summary may remain empty if no editor-authored abstract is available.

View File

@ -0,0 +1,38 @@
# Forgejo CI Activation Checklist
Use this checklist before turning on a Forgejo-native pipeline for `EcoSpecies-Atlas`.
## 1. Confirm Workflow Source
- Decide whether the repository will continue using the fallback `.github/workflows/ci.yml` definition or activate a dedicated `.forgejo/workflows/ci.yml`.
- If a Forgejo-native workflow is desired, start from `.forgejo/workflows/ci.yml.template`.
## 2. Confirm Runner Labels
- Identify the runner label accepted by the target Forgejo instance.
- Replace the placeholder `runs-on: docker` values in the template with that label before activation.
- Verify that the selected runner can execute both Python and Node-based jobs.
## 3. Confirm Action Source Policy
- Check whether the Forgejo instance allows GitHub-hosted actions such as `actions/checkout`, `actions/setup-python`, and `actions/setup-node`.
- If remote GitHub-hosted actions are not allowed, replace those `uses:` entries with approved internal or mirrored actions.
## 4. Activate The Workflow
1. Copy `.forgejo/workflows/ci.yml.template` to `.forgejo/workflows/ci.yml`.
2. Update runner labels.
3. Update action references if required by the instance policy.
4. Commit the activated workflow.
## 5. Validate CI Behavior
- Open a test change request and confirm the repository-layer API tests run.
- Confirm the stubbed browser smoke test runs and passes.
- Verify that failures block merges according to the repository-host policy.
## 6. Optional Hardening
- Require the `CI` workflow before merge.
- Mirror or pin all action sources used by the Forgejo workflow.
- Document any instance-specific runner labels or action mirrors in the repository host settings or administrator notes.

101
docs/legacy-survey.md Normal file
View File

@ -0,0 +1,101 @@
# Legacy EcoSpecies Survey
## Scope
This repository is primarily an archive of the legacy EcoSpecies system and its source materials. The current contents are sufficient to begin a structured migration plan and an initial replacement implementation.
## Repository Inventory
- `01-legacy-code-and-data/EcoSpecies_2012_0807_onCurly`: legacy ASP.NET MVC application source.
- `01-legacy-code-and-data/EcoSpeciesSql_new`: SQL Server database creation and lookup-population scripts.
- `01-legacy-code-and-data/InputFiles - TXT`: 92 source species life history text files.
- `01-legacy-code-and-data/OutputFiles - RTF`: 95 generated report outputs.
- `01-legacy-code-and-data/ecospecies-2/species-life-histories`: 95 paired `.txt` and `.sql` files representing a later export snapshot.
- `01-legacy-code-and-data/slh-mod-txt2sql`: Python parsing scripts and hand-edited SQL/text files used to ingest SLH content.
- `01-legacy-code-and-data/TextFilesAboutFLELMR_EcoSpecies`: manuals, contract/report artifacts, information architecture, and historical background.
- `02-docs`: project notes, species outlines, import notes, and spreadsheets.
## Legacy Capabilities Confirmed
The legacy ASP.NET MVC application exposes these core workflows:
- Public species list with taxonomic sorting and fielded search.
- Per-species detail pages.
- Heading and subheading navigation for life-history content.
- Report generation to `rtf`, `txt`, and "web only" output modes.
- Public/private visibility controls via `tbl_Slh.PublicView`.
- XML-template-driven report assembly.
Evidence:
- `HomeController.cs` provides `Home`, `About`, `Glossary`, and `Manual`.
- `OrganismController.cs` implements listing, filtering, details, node/subnode views, and CRUD.
- `ReportController.cs` implements report generation and batch export.
## Legacy Architecture Summary
### Application layer
- ASP.NET MVC 3-era application.
- Entity Framework database-first model (`EcoSpecies.edmx` and generated context/models).
- Razor views with jQuery/jQuery UI assets.
### Data layer
- SQL Server database `Eco_Species`.
- 31 SQL scripts in `EcoSpeciesSql_new` for database creation, schema population, lookup tables, admin user creation, and XML template support.
### Content pipeline
- Species Life History text files are semi-structured and heading-driven.
- Legacy Python parsing scripts (`slhparse.py`, `slhparse_2012_0801.py`) contain domain-specific cleanup and tag recognition logic.
- Generated outputs include SQL inserts and RTF/text reports.
## Important Migration Observations
### What is reusable
- Raw SLH text corpus.
- SQL schema as a source of domain concepts and relationship mapping.
- Parsing logic and tag dictionaries as institutional knowledge.
- Glossary/manual/about content for continuity.
- Existing report outputs for regression comparison.
### What should not be copied forward directly
- SQL Server-specific operational assumptions.
- Legacy publish/deploy practices.
- MVC 3 / EF database-first scaffolding.
- Generated binaries and `obj` artifacts.
### Data-model implications
The archive suggests the modern system needs first-class support for:
- species and taxonomic metadata
- one or more source documents per species
- hierarchical sections/headings/subheadings
- citations/references and authoring metadata
- visibility/publication state
- report/export templates
- ecological linkages suitable for graph-style visualization
## Risks and Gaps
- The source text format is inconsistent and sometimes noisy; ingest must tolerate malformed headings and spacing.
- The legacy system notes that some outlines used 4-5 levels while the implemented site handled only 3 levels.
- The current repository does not include a clean, already-normalized database dump for direct import.
- Image/assets provenance and usage permissions need review during migration.
## Acknowledgements To Preserve
The replacement app should preserve credit to:
- Dr. Peter Rubec for FLELMR-derived source material and species life history content.
- Dr. Diane Blackwood for the original EcoSpecies web application and architecture work.
- Dr. Welsbery R. Elsberry for consultation and Python programming support.
- Florida Fish and Wildlife Research Institute and related public-agency context described in the project materials.
## Immediate Migration Recommendation
Use the SLH text corpus as the initial authoritative ingest source, not the legacy MVC app. Treat the SQL schema and parser scripts as reference material for a modern normalized model and for ingest validation.

126
docs/roadmap.md Normal file
View File

@ -0,0 +1,126 @@
# EcoSpecies Modernization Roadmap
## Target Product
Create a Docker Compose-based, open-source EcoSpecies successor that:
- ingests legacy SLH text files and future species submissions
- exposes a stable API for species, sections, citations, and ecological linkages
- provides a responsive public web app
- supports researcher/editor workflows for curation and publishing
- generates exports aligned with legacy reporting needs and future FLELMR-style outputs
## Recommended Stack
### Core platform
- Backend: Python API service
- Primary datastore: PostgreSQL
- Search/indexing: PostgreSQL full-text initially, optional Meilisearch/OpenSearch later
- Frontend: static SPA or React-based client once requirements stabilize
- Deployment/runtime: Docker Compose for development and small-scale deployment
### Why this stack
- permissive licenses
- strong support for text ingestion, APIs, and data processing
- easy local development
- clear path from prototype to production
## Product Capabilities By Phase
### Phase 0: Discovery and migration planning
- Inventory legacy assets and user-facing capabilities.
- Capture the replacement architecture and ingestion strategy.
- Define acknowledgements, provenance, and licensing boundaries.
### Phase 1: Ingestion foundation
- Parse legacy `.txt` SLH inputs into structured JSON records.
- Normalize common metadata: title, scientific name, common name, FLELMR code, headings, references.
- Create ingest diagnostics to flag malformed files and missing metadata.
### Phase 2: Public read experience
- Species listing and search.
- Species detail view with section navigation.
- Provenance and acknowledgement display.
- Summary metrics on corpus coverage.
### Phase 3: Structured persistence
- Move parsed content into PostgreSQL.
- Add editor-safe import jobs and audit metadata.
- Preserve raw source alongside normalized records.
- Establish authentication and role-based access for editor and admin workflows.
- Add persisted editorial workflow state for draft, review, and published records.
- Make document sections individually addressable for editor review and revision, with audit history for section-level changes.
### Phase 4: Linkages and visualization
- Model predator/prey, habitat, and ecological association edges.
- Add graph endpoints and species-relationship views.
- Support public-friendly visual explanations and expert filters.
### Phase 5: Reports and export
- Recreate legacy-like text/RTF export.
- Add machine-readable export formats such as JSON and Markdown.
- Support FLELMR-oriented authoring/export profiles.
### Phase 6: Assisted research workflows
- Add local-LLM-assisted extraction and drafting in a human-review loop.
- Integrate bibliography tooling for citation consolidation.
- Support candidate-species intake for records not yet in the historical corpus.
- Restrict assisted drafting and publication actions to authenticated editorial roles.
## Data Model Direction
Initial core entities:
- `species`
- `source_document`
- `document_section`
- `citation`
- `taxon`
- `linkage`
- `media_asset`
- `ingest_run`
Key design rules:
- preserve raw source text
- retain provenance and import timestamps
- separate public published records from draft/editor states
- make sections addressable for citation and graph linking
## LLM Extension Strategy
Use local models only for assistive tasks, never silent publication:
- extracting candidate structured fields from new SLH text
- suggesting missing headings or linkage labels
- clustering similar citations
- drafting summaries for editor review
Guardrails:
- raw text remains authoritative
- all generated content is marked as draft
- every automated extraction stores source spans where possible
## Development Roadmap
1. Implement a thin ingestion API over the legacy text corpus.
2. Build a responsive browser UI for listing and viewing species.
3. Add a persistent PostgreSQL-backed ingest store.
4. Introduce export and visualization services.
5. Add editorial workflows and local-LLM assistance.
## Definition Of Done For The Initial Milestone
- `docker compose up` starts a working API and frontend.
- The system can enumerate the legacy corpus and show parsed species detail for at least one real SLH file.
- Project docs describe the migration approach, target architecture, and next phases.

76
package-lock.json generated Normal file
View File

@ -0,0 +1,76 @@
{
"name": "ecospecies-atlas",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "ecospecies-atlas",
"devDependencies": {
"@playwright/test": "^1.58.2"
}
},
"node_modules/@playwright/test": {
"version": "1.58.2",
"resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.58.2.tgz",
"integrity": "sha512-akea+6bHYBBfA9uQqSYmlJXn61cTa+jbO87xVLCWbTqbWadRVmhxlXATaOjOgcBaWU4ePo0wB41KMFv3o35IXA==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"playwright": "1.58.2"
},
"bin": {
"playwright": "cli.js"
},
"engines": {
"node": ">=18"
}
},
"node_modules/fsevents": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
"integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/playwright": {
"version": "1.58.2",
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.2.tgz",
"integrity": "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"playwright-core": "1.58.2"
},
"bin": {
"playwright": "cli.js"
},
"engines": {
"node": ">=18"
},
"optionalDependencies": {
"fsevents": "2.3.2"
}
},
"node_modules/playwright-core": {
"version": "1.58.2",
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.58.2.tgz",
"integrity": "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg==",
"dev": true,
"license": "Apache-2.0",
"bin": {
"playwright-core": "cli.js"
},
"engines": {
"node": ">=18"
}
}
}
}

11
package.json Normal file
View File

@ -0,0 +1,11 @@
{
"name": "ecospecies-atlas",
"private": true,
"scripts": {
"test:ui": "node ./node_modules/@playwright/test/cli.js test",
"test:ui:stack": "PLAYWRIGHT_BASE_URL=http://127.0.0.1:18080 PLAYWRIGHT_LIVE_STACK=1 node ./node_modules/@playwright/test/cli.js test"
},
"devDependencies": {
"@playwright/test": "^1.58.2"
}
}

22
playwright.config.js Normal file
View File

@ -0,0 +1,22 @@
const { defineConfig } = require("./node_modules/@playwright/test");
const isLiveStack = process.env.PLAYWRIGHT_LIVE_STACK === "1";
const baseURL = process.env.PLAYWRIGHT_BASE_URL || "http://127.0.0.1:4173";
module.exports = defineConfig({
testDir: "./tests/ui",
timeout: 30000,
retries: 0,
use: {
baseURL,
headless: true,
},
webServer: isLiveStack
? undefined
: {
command: "node tests/ui/server.js",
url: "http://127.0.0.1:4173",
reuseExistingServer: true,
timeout: 30000,
},
});

View File

@ -0,0 +1,13 @@
#!/bin/sh
set -eu
ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
VENV_DIR="${ECOSPECIES_VENV_DIR:-$ROOT_DIR/.docker/venv}"
mkdir -p "$VENV_DIR"
if [ ! -x "$VENV_DIR/bin/python" ]; then
python3 -m venv --copies --clear "$VENV_DIR"
fi
"$VENV_DIR/bin/pip" install --disable-pip-version-check -r "$ROOT_DIR/apps/api/requirements.txt"

View File

@ -0,0 +1,7 @@
#!/bin/sh
set -eu
ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
cd "$ROOT_DIR/apps/api"
exec python -m unittest -v

10
scripts/check-ui-smoke.sh Normal file
View File

@ -0,0 +1,10 @@
#!/bin/sh
set -eu
ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
cd "$ROOT_DIR"
if [ ! -d node_modules ]; then
npm install --no-bin-links
fi
exec npm run test:ui

View File

@ -0,0 +1,10 @@
#!/bin/sh
set -eu
ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
cd "$ROOT_DIR"
if [ ! -d node_modules ]; then
npm install --no-bin-links
fi
exec npm run test:ui:stack

9
scripts/run-api.sh Normal file
View File

@ -0,0 +1,9 @@
#!/bin/sh
set -eu
ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
VENV_DIR="${ECOSPECIES_VENV_DIR:-$ROOT_DIR/.docker/venv}"
export PYTHONPATH="${PYTHONPATH:-$ROOT_DIR/apps/api/src}"
exec "$VENV_DIR/bin/python" -m ecospecies_api.app

9
scripts/run-import.sh Normal file
View File

@ -0,0 +1,9 @@
#!/bin/sh
set -eu
ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
VENV_DIR="${ECOSPECIES_VENV_DIR:-$ROOT_DIR/.docker/venv}"
export PYTHONPATH="${PYTHONPATH:-$ROOT_DIR/apps/api/src}"
exec "$VENV_DIR/bin/python" -m ecospecies_api.cli

View File

@ -0,0 +1,46 @@
const { test, expect } = require("../../node_modules/@playwright/test");
test.skip(process.env.PLAYWRIGHT_LIVE_STACK === "1", "Stub-only UI smoke test");
test.beforeEach(async ({ request }) => {
await request.post("/__reset");
});
test("editor can filter archived records and archive/unarchive from the UI", async ({ page }) => {
const archiveFilters = page.locator("#archive-filter-group");
const speciesList = page.locator("#species-list");
await page.goto("/");
await page.getByPlaceholder("Bearer token for editor access").fill("editor-token");
await page.getByRole("button", { name: "Use Token" }).click();
await expect(archiveFilters).toBeVisible();
await expect(archiveFilters.getByRole("button", { name: "Active", exact: true })).toHaveClass(/is-active/);
await expect(speciesList.getByRole("button", { name: /Active Shad/ })).toBeVisible();
await expect(speciesList.getByRole("button", { name: /Archived Shad/ })).toHaveCount(0);
await archiveFilters.getByRole("button", { name: "All", exact: true }).click();
await expect(speciesList.getByRole("button", { name: /Archived Shad/ })).toBeVisible();
await speciesList.getByRole("button", { name: /Active Shad/ }).click();
await expect(page.getByText("Archive this species")).toBeVisible();
await expect(page.locator("#editor-is-archived")).not.toBeChecked();
await page.locator("#editor-is-archived").check();
await page.getByRole("button", { name: "Save Editorial Changes" }).click();
await expect(page.locator("#detail-archive-badge")).toBeVisible();
await expect(page.locator("#detail-archive-note")).toBeVisible();
await archiveFilters.getByRole("button", { name: "Archived", exact: true }).click();
await expect(speciesList.getByRole("button", { name: /Active Shad/ })).toBeVisible();
await speciesList.getByRole("button", { name: /Active Shad/ }).click();
await expect(page.locator("#editor-is-archived")).toBeChecked();
await page.locator("#editor-is-archived").uncheck();
await page.getByRole("button", { name: "Save Editorial Changes" }).click();
await archiveFilters.getByRole("button", { name: "Active", exact: true }).click();
await expect(speciesList.getByRole("button", { name: /Active Shad/ })).toBeVisible();
await expect(page.locator("#detail-archive-badge")).toBeHidden();
});

View File

@ -0,0 +1,46 @@
const { test, expect } = require("../../node_modules/@playwright/test");
test.skip(process.env.PLAYWRIGHT_LIVE_STACK !== "1", "Live-stack smoke test only");
test("editor can archive and unarchive a real species in the running stack", async ({ page }) => {
const archiveFilters = page.locator("#archive-filter-group");
const speciesList = page.locator("#species-list");
const targetName = /Alabama Shad/;
await page.goto("/");
await page.getByPlaceholder("Bearer token for editor access").fill("editor-token");
await page.getByRole("button", { name: "Use Token" }).click();
await expect(archiveFilters).toBeVisible();
await archiveFilters.getByRole("button", { name: "All", exact: true }).click();
await page.locator("#search").fill("Alabama");
await expect(speciesList.getByRole("button", { name: targetName })).toBeVisible();
await speciesList.getByRole("button", { name: targetName }).click();
await expect(page.locator("#editor-is-archived")).toBeVisible();
if (await page.locator("#editor-is-archived").isChecked()) {
await page.locator("#editor-is-archived").uncheck();
await page.getByRole("button", { name: "Save Editorial Changes" }).click();
await expect(page.locator("#detail-archive-badge")).toBeHidden();
}
await page.locator("#editor-is-archived").check();
await page.getByRole("button", { name: "Save Editorial Changes" }).click();
await expect(page.locator("#detail-archive-badge")).toBeVisible();
await expect(page.locator("#detail-archive-note")).toBeVisible();
await archiveFilters.getByRole("button", { name: "Archived", exact: true }).click();
await expect(speciesList.getByRole("button", { name: targetName })).toBeVisible();
await speciesList.getByRole("button", { name: targetName }).click();
await expect(page.locator("#editor-is-archived")).toBeChecked();
await page.locator("#editor-is-archived").uncheck();
await page.getByRole("button", { name: "Save Editorial Changes" }).click();
await archiveFilters.getByRole("button", { name: "Active", exact: true }).click();
await page.locator("#search").fill("Alabama");
await expect(speciesList.getByRole("button", { name: targetName })).toBeVisible();
await expect(page.locator("#detail-archive-badge")).toBeHidden();
});

240
tests/ui/server.js Normal file
View File

@ -0,0 +1,240 @@
const http = require("http");
const fs = require("fs");
const path = require("path");
const PORT = Number(process.env.PORT || 4173);
const WEB_ROOT = path.resolve(__dirname, "../../apps/web");
function clone(value) {
return JSON.parse(JSON.stringify(value));
}
const baseSpecies = [
{
slug: "active-shad",
source_file: "Active Shad.txt",
title: "Active Shad (Alosa activa)",
common_name: "Active Shad",
scientific_name: "Alosa activa",
flelmr_code: "1001",
summary: "An active editor-visible species.",
section_count: 2,
publication_status: "published",
is_archived: false,
editor_notes: "",
last_modified_by: "system-import",
diagnostics: [],
sections: [
{ id: 1, position: 1, heading: "HEADER", content: "Header content" },
{ id: 2, position: 2, heading: "HABITAT", content: "Habitat content" },
],
audit: [],
},
{
slug: "archived-shad",
source_file: "Archived Shad.txt",
title: "Archived Shad (Alosa archiva)",
common_name: "Archived Shad",
scientific_name: "Alosa archiva",
flelmr_code: "1002",
summary: "An archived species.",
section_count: 1,
publication_status: "published",
is_archived: true,
editor_notes: "Archived from prior import.",
last_modified_by: "system-import",
diagnostics: [],
sections: [
{ id: 3, position: 1, heading: "HEADER", content: "Archived header" },
],
audit: [
{
id: 1,
changed_by: "system-import",
changed_at: "2026-03-26T00:00:00+00:00",
action: "import_archive",
details: { is_archived: { from: false, to: true } },
},
],
},
];
let speciesRecords = clone(baseSpecies);
function resetState() {
speciesRecords = clone(baseSpecies);
}
function getSession(req) {
const auth = req.headers.authorization || "";
if (auth === "Bearer editor-token") {
return {
authenticated: true,
auth_configured: true,
user: { username: "editor", role: "editor" },
};
}
return {
authenticated: false,
auth_configured: true,
user: null,
};
}
function sendJson(res, status, payload) {
const body = JSON.stringify(payload);
res.writeHead(status, {
"Content-Type": "application/json; charset=utf-8",
"Content-Length": Buffer.byteLength(body),
});
res.end(body);
}
function sendFile(res, filePath) {
fs.readFile(filePath, (error, content) => {
if (error) {
sendJson(res, 404, { error: "Not found" });
return;
}
const ext = path.extname(filePath);
const type =
ext === ".html"
? "text/html; charset=utf-8"
: ext === ".js"
? "application/javascript; charset=utf-8"
: "text/css; charset=utf-8";
res.writeHead(200, { "Content-Type": type });
res.end(content);
});
}
function parseBody(req) {
return new Promise((resolve, reject) => {
let raw = "";
req.on("data", (chunk) => {
raw += chunk;
});
req.on("end", () => {
try {
resolve(raw ? JSON.parse(raw) : {});
} catch (error) {
reject(error);
}
});
req.on("error", reject);
});
}
function getEditorList() {
return speciesRecords.map((item) => ({
slug: item.slug,
title: item.title,
common_name: item.common_name,
publication_status: item.publication_status,
is_archived: item.is_archived,
last_modified_by: item.last_modified_by,
diagnostics: item.diagnostics,
}));
}
const server = http.createServer(async (req, res) => {
const url = new URL(req.url, `http://${req.headers.host}`);
const pathname = url.pathname;
if (pathname === "/__reset" && req.method === "POST") {
resetState();
sendJson(res, 200, { status: "ok" });
return;
}
if (pathname === "/api/auth/session" && req.method === "GET") {
sendJson(res, 200, getSession(req));
return;
}
if (pathname === "/api/insights/summary" && req.method === "GET") {
sendJson(res, 200, { species_count: 1, section_count: 3, diagnostic_counts: {} });
return;
}
if (pathname === "/api/editor/species" && req.method === "GET") {
sendJson(res, 200, { items: getEditorList(), count: speciesRecords.length });
return;
}
if (pathname.startsWith("/api/editor/species/") && pathname.endsWith("/audit") && req.method === "GET") {
const slug = pathname.slice("/api/editor/species/".length, -"/audit".length).replace(/\/$/, "");
const item = speciesRecords.find((record) => record.slug === slug);
if (!item) {
sendJson(res, 404, { error: "Not found" });
return;
}
sendJson(res, 200, { items: item.audit, count: item.audit.length });
return;
}
if (pathname.startsWith("/api/editor/species/") && pathname.endsWith("/editorial") && req.method === "POST") {
const slug = pathname.slice("/api/editor/species/".length, -"/editorial".length).replace(/\/$/, "");
const item = speciesRecords.find((record) => record.slug === slug);
if (!item) {
sendJson(res, 404, { error: "Not found" });
return;
}
const payload = await parseBody(req);
const beforeArchived = item.is_archived;
item.publication_status = payload.publication_status || item.publication_status;
item.summary = payload.summary ?? item.summary;
item.editor_notes = payload.editor_notes ?? item.editor_notes;
item.is_archived = Boolean(payload.is_archived);
item.last_modified_by = "editor";
item.audit.unshift({
id: item.audit.length + 1,
changed_by: "editor",
changed_at: "2026-03-26T00:01:00+00:00",
action: "editorial_update",
details: {
is_archived: { from: beforeArchived, to: item.is_archived },
},
});
sendJson(res, 200, {
status: "ok",
slug: item.slug,
summary: item.summary,
publication_status: item.publication_status,
editor_notes: item.editor_notes,
is_archived: item.is_archived,
last_modified_by: item.last_modified_by,
changed_fields: {
is_archived: { from: beforeArchived, to: item.is_archived },
},
});
return;
}
if (pathname.startsWith("/api/editor/species/") && req.method === "GET") {
const slug = pathname.slice("/api/editor/species/".length).replace(/\/$/, "");
const item = speciesRecords.find((record) => record.slug === slug);
if (!item) {
sendJson(res, 404, { error: "Not found" });
return;
}
sendJson(res, 200, item);
return;
}
if (pathname === "/" || pathname === "/index.html") {
sendFile(res, path.join(WEB_ROOT, "index.html"));
return;
}
if (pathname === "/app.js" || pathname === "/styles.css") {
sendFile(res, path.join(WEB_ROOT, pathname.slice(1)));
return;
}
sendJson(res, 404, { error: "Not found" });
});
server.listen(PORT, "127.0.0.1", () => {
process.stdout.write(`UI test server listening on ${PORT}\n`);
});