1647 lines
59 KiB
Python
1647 lines
59 KiB
Python
from __future__ import annotations
|
|
|
|
from collections import Counter
|
|
from datetime import datetime, timezone
|
|
import json
|
|
import hashlib
|
|
import os
|
|
from pathlib import Path
|
|
import re
|
|
import secrets
|
|
|
|
from sqlalchemy import inspect, select, text
|
|
from sqlalchemy.exc import SQLAlchemyError
|
|
|
|
from ecospecies_api.citation_enrichment import (
|
|
apply_citation_candidate_selection,
|
|
discover_citation_candidates,
|
|
enrich_citation_payload,
|
|
)
|
|
from ecospecies_api.document_format import extract_species_projection, parse_markdown_document
|
|
from ecospecies_api.document_format import add_citation_to_document, export_markdown_document
|
|
from ecospecies_api.document_repository import (
|
|
get_species_document_payload,
|
|
save_species_document,
|
|
sync_species_document,
|
|
)
|
|
from ecospecies_api.db import SessionLocal, create_db_engine
|
|
from ecospecies_api.models import (
|
|
Base,
|
|
ContributorAccount,
|
|
DocumentSection,
|
|
IngestDiagnosticRecord,
|
|
Species,
|
|
SpeciesAuditLog,
|
|
SpeciesCitation,
|
|
SpeciesTaxonIdentifier,
|
|
)
|
|
from ecospecies_api.parser import get_default_data_dir, slugify
|
|
|
|
WORKFLOW_STATUSES = {"draft", "review", "published"}
|
|
CITATION_REVIEW_STATUSES = {"draft", "reviewed", "accepted", "rejected"}
|
|
SYSTEM_IMPORT_USER = "system-import"
|
|
CONTRIBUTOR_SUBMISSION_PREFIX = "contributor-submission"
|
|
EMAIL_PATTERN = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
|
|
|
|
|
|
def ensure_schema() -> None:
|
|
engine = create_db_engine()
|
|
Base.metadata.create_all(engine)
|
|
inspector = inspect(engine)
|
|
species_columns = {column["name"] for column in inspector.get_columns("species")}
|
|
statements: list[str] = []
|
|
if "publication_status" not in species_columns:
|
|
statements.append("ALTER TABLE species ADD COLUMN publication_status VARCHAR(32) DEFAULT 'published'")
|
|
if "is_archived" not in species_columns:
|
|
statements.append("ALTER TABLE species ADD COLUMN is_archived BOOLEAN DEFAULT FALSE")
|
|
if "editor_notes" not in species_columns:
|
|
statements.append("ALTER TABLE species ADD COLUMN editor_notes TEXT DEFAULT ''")
|
|
if "created_by" not in species_columns:
|
|
statements.append("ALTER TABLE species ADD COLUMN created_by VARCHAR(255) DEFAULT 'system-import'")
|
|
if "owner_username" not in species_columns:
|
|
statements.append("ALTER TABLE species ADD COLUMN owner_username VARCHAR(255) DEFAULT ''")
|
|
if "owner_role" not in species_columns:
|
|
statements.append("ALTER TABLE species ADD COLUMN owner_role VARCHAR(32) DEFAULT ''")
|
|
if "last_modified_by" not in species_columns:
|
|
statements.append("ALTER TABLE species ADD COLUMN last_modified_by VARCHAR(255) DEFAULT 'system-import'")
|
|
if statements:
|
|
with engine.begin() as connection:
|
|
for statement in statements:
|
|
connection.execute(text(statement))
|
|
connection.execute(
|
|
text(
|
|
"UPDATE species SET publication_status = COALESCE(publication_status, 'published'), "
|
|
"is_archived = COALESCE(is_archived, FALSE), "
|
|
"editor_notes = COALESCE(editor_notes, ''), "
|
|
"created_by = COALESCE(created_by, 'system-import'), "
|
|
"owner_username = COALESCE(owner_username, ''), "
|
|
"owner_role = COALESCE(owner_role, ''), "
|
|
"last_modified_by = COALESCE(last_modified_by, 'system-import')"
|
|
)
|
|
)
|
|
|
|
tables = set(inspector.get_table_names())
|
|
if "species_citation" in tables:
|
|
citation_columns = {
|
|
column["name"] for column in inspector.get_columns("species_citation")
|
|
}
|
|
citation_statements: list[str] = []
|
|
if "legacy_reference_number" not in citation_columns:
|
|
citation_statements.append(
|
|
"ALTER TABLE species_citation ADD COLUMN legacy_reference_number VARCHAR(64) DEFAULT ''"
|
|
)
|
|
if "citation_key" not in citation_columns:
|
|
citation_statements.append(
|
|
"ALTER TABLE species_citation ADD COLUMN citation_key VARCHAR(255) DEFAULT ''"
|
|
)
|
|
if "entry_type" not in citation_columns:
|
|
citation_statements.append(
|
|
"ALTER TABLE species_citation ADD COLUMN entry_type VARCHAR(64) DEFAULT 'misc'"
|
|
)
|
|
if "draft_bibtex" not in citation_columns:
|
|
citation_statements.append(
|
|
"ALTER TABLE species_citation ADD COLUMN draft_bibtex TEXT DEFAULT ''"
|
|
)
|
|
if "abstract_text" not in citation_columns:
|
|
citation_statements.append(
|
|
"ALTER TABLE species_citation ADD COLUMN abstract_text TEXT DEFAULT ''"
|
|
)
|
|
if "source_url" not in citation_columns:
|
|
citation_statements.append(
|
|
"ALTER TABLE species_citation ADD COLUMN source_url VARCHAR(500) DEFAULT ''"
|
|
)
|
|
if "openalex_id" not in citation_columns:
|
|
citation_statements.append(
|
|
"ALTER TABLE species_citation ADD COLUMN openalex_id VARCHAR(64) DEFAULT ''"
|
|
)
|
|
if "resolver_source_label" not in citation_columns:
|
|
citation_statements.append(
|
|
"ALTER TABLE species_citation ADD COLUMN resolver_source_label VARCHAR(255) DEFAULT ''"
|
|
)
|
|
if "enrichment_status" not in citation_columns:
|
|
citation_statements.append(
|
|
"ALTER TABLE species_citation ADD COLUMN enrichment_status VARCHAR(32) DEFAULT 'pending'"
|
|
)
|
|
if "enrichment_error" not in citation_columns:
|
|
citation_statements.append(
|
|
"ALTER TABLE species_citation ADD COLUMN enrichment_error TEXT DEFAULT ''"
|
|
)
|
|
if citation_statements:
|
|
with engine.begin() as connection:
|
|
for statement in citation_statements:
|
|
connection.execute(text(statement))
|
|
connection.execute(
|
|
text(
|
|
"UPDATE species_citation SET "
|
|
"legacy_reference_number = COALESCE(legacy_reference_number, ''), "
|
|
"citation_key = COALESCE(citation_key, ''), "
|
|
"entry_type = COALESCE(entry_type, 'misc'), "
|
|
"abstract_text = COALESCE(abstract_text, ''), "
|
|
"draft_bibtex = COALESCE(draft_bibtex, ''), "
|
|
"source_url = COALESCE(source_url, ''), "
|
|
"openalex_id = COALESCE(openalex_id, ''), "
|
|
"resolver_source_label = COALESCE(resolver_source_label, ''), "
|
|
"enrichment_status = COALESCE(enrichment_status, 'pending'), "
|
|
"enrichment_error = COALESCE(enrichment_error, '')"
|
|
)
|
|
)
|
|
|
|
|
|
def _citation_to_payload(citation: SpeciesCitation) -> dict[str, object]:
|
|
return {
|
|
"id": citation.id,
|
|
"position": citation.position,
|
|
"section_heading": citation.section_heading,
|
|
"legacy_reference_number": citation.legacy_reference_number,
|
|
"citation_key": citation.citation_key,
|
|
"entry_type": citation.entry_type,
|
|
"raw_text": citation.raw_text,
|
|
"normalized_text": citation.normalized_text,
|
|
"abstract_text": citation.abstract_text,
|
|
"draft_bibtex": citation.draft_bibtex,
|
|
"doi": citation.doi,
|
|
"source_url": citation.source_url,
|
|
"openalex_id": citation.openalex_id,
|
|
"resolver_source_label": citation.resolver_source_label,
|
|
"enrichment_status": citation.enrichment_status,
|
|
"enrichment_error": citation.enrichment_error,
|
|
"source_type": citation.source_type,
|
|
"review_status": citation.review_status,
|
|
}
|
|
|
|
|
|
def _structured_document_to_payload(species: Species) -> dict[str, object] | None:
|
|
if species.document is None:
|
|
return None
|
|
|
|
ast: dict[str, object] | None = None
|
|
raw_ast = str(species.document.ast_json or "").strip()
|
|
if raw_ast:
|
|
try:
|
|
parsed = json.loads(raw_ast)
|
|
if isinstance(parsed, dict):
|
|
ast = parsed
|
|
except json.JSONDecodeError:
|
|
ast = None
|
|
|
|
return {
|
|
"source_format": species.document.source_format,
|
|
"updated_by": species.document.updated_by,
|
|
"node_count": len(species.document.nodes),
|
|
"ast": ast,
|
|
}
|
|
|
|
|
|
def _legacy_source_to_payload(species: Species) -> dict[str, object] | None:
|
|
source_file = str(species.source_file or "").strip()
|
|
if not source_file:
|
|
return None
|
|
|
|
try:
|
|
data_dir = Path(get_default_data_dir()).resolve()
|
|
candidate = (data_dir / source_file).resolve()
|
|
if candidate.parent != data_dir or not candidate.is_file():
|
|
return None
|
|
text = candidate.read_text(encoding="utf-8", errors="replace")
|
|
except (OSError, ValueError):
|
|
return None
|
|
|
|
return {
|
|
"source_file": source_file,
|
|
"text": text,
|
|
}
|
|
|
|
|
|
def import_species_payload(payload: list[dict[str, object]]) -> None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
existing_species = {
|
|
item.slug: item for item in session.scalars(select(Species)).all()
|
|
}
|
|
incoming_slugs: set[str] = set()
|
|
for item in payload:
|
|
slug = item["slug"]
|
|
incoming_slugs.add(slug)
|
|
species = existing_species.get(slug)
|
|
if species is None:
|
|
species = Species(
|
|
slug=slug,
|
|
source_file=item["source_file"],
|
|
title=item["title"],
|
|
common_name=item["common_name"],
|
|
scientific_name=item["scientific_name"],
|
|
flelmr_code=item["flelmr_code"],
|
|
summary=item["summary"],
|
|
section_count=item["section_count"],
|
|
publication_status="published",
|
|
is_archived=False,
|
|
editor_notes="",
|
|
created_by=SYSTEM_IMPORT_USER,
|
|
owner_username="",
|
|
owner_role="",
|
|
last_modified_by=SYSTEM_IMPORT_USER,
|
|
)
|
|
session.add(species)
|
|
session.flush()
|
|
|
|
_ = species.sections
|
|
_ = species.diagnostics
|
|
_ = species.audit_entries
|
|
|
|
editorial_fields, section_positions = _get_editor_preservation_state(species)
|
|
|
|
species.source_file = item["source_file"]
|
|
species.title = item["title"]
|
|
species.common_name = item["common_name"]
|
|
species.scientific_name = item["scientific_name"]
|
|
species.flelmr_code = item["flelmr_code"]
|
|
species.section_count = item["section_count"]
|
|
if species.is_archived:
|
|
species.is_archived = False
|
|
session.add(
|
|
SpeciesAuditLog(
|
|
species_id=species.id,
|
|
changed_by=SYSTEM_IMPORT_USER,
|
|
changed_at=datetime.now(timezone.utc).isoformat(),
|
|
action="import_restore",
|
|
details_json=json.dumps(
|
|
{"is_archived": {"from": True, "to": False}},
|
|
ensure_ascii=True,
|
|
),
|
|
)
|
|
)
|
|
|
|
if "summary" not in editorial_fields:
|
|
species.summary = item["summary"]
|
|
if species.last_modified_by == "":
|
|
species.last_modified_by = SYSTEM_IMPORT_USER
|
|
|
|
existing_sections = {section.position: section for section in species.sections}
|
|
incoming_positions: set[int] = set()
|
|
for position, section_payload in enumerate(item["sections"], start=1):
|
|
incoming_positions.add(position)
|
|
section = existing_sections.get(position)
|
|
if section is None:
|
|
section = DocumentSection(
|
|
species_id=species.id,
|
|
position=position,
|
|
heading=section_payload["heading"],
|
|
content=section_payload["content"],
|
|
)
|
|
session.add(section)
|
|
continue
|
|
|
|
section.heading = section_payload["heading"]
|
|
if position not in section_positions:
|
|
section.content = section_payload["content"]
|
|
session.add(section)
|
|
|
|
for position, section in existing_sections.items():
|
|
if position not in incoming_positions:
|
|
session.delete(section)
|
|
|
|
for diagnostic in list(species.diagnostics):
|
|
session.delete(diagnostic)
|
|
|
|
for diagnostic in item["diagnostics"]:
|
|
if diagnostic["code"] == "missing_summary":
|
|
continue
|
|
session.add(
|
|
IngestDiagnosticRecord(
|
|
species_id=species.id,
|
|
level=diagnostic["level"],
|
|
code=diagnostic["code"],
|
|
message=diagnostic["message"],
|
|
)
|
|
)
|
|
|
|
sync_species_document(session, species, item)
|
|
|
|
for slug, species in existing_species.items():
|
|
if slug in incoming_slugs:
|
|
continue
|
|
if not species.is_archived:
|
|
species.is_archived = True
|
|
session.add(
|
|
SpeciesAuditLog(
|
|
species_id=species.id,
|
|
changed_by=SYSTEM_IMPORT_USER,
|
|
changed_at=datetime.now(timezone.utc).isoformat(),
|
|
action="import_archive",
|
|
details_json=json.dumps(
|
|
{"is_archived": {"from": False, "to": True}},
|
|
ensure_ascii=True,
|
|
),
|
|
)
|
|
)
|
|
session.commit()
|
|
|
|
|
|
def get_species_document(slug: str) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
return get_species_document_payload(session, slug)
|
|
|
|
|
|
def _get_editor_preservation_state(species: Species) -> tuple[set[str], set[int]]:
|
|
editorial_fields: set[str] = set()
|
|
section_positions: set[int] = set()
|
|
|
|
for entry in species.audit_entries:
|
|
try:
|
|
details = json.loads(entry.details_json)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
if entry.action == "editorial_update":
|
|
editorial_fields.update(details.keys())
|
|
elif entry.action == "section_update":
|
|
section_position = details.get("section_position")
|
|
if isinstance(section_position, int):
|
|
section_positions.add(section_position)
|
|
|
|
return editorial_fields, section_positions
|
|
|
|
|
|
def has_species_data() -> bool:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species.id).limit(1))
|
|
return species is not None
|
|
|
|
|
|
def get_readiness_status() -> dict[str, object]:
|
|
try:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species_count = session.query(Species).count()
|
|
return {
|
|
"ready": True,
|
|
"database": "ok",
|
|
"species_count": species_count,
|
|
"data_state": "loaded" if species_count > 0 else "empty",
|
|
}
|
|
except SQLAlchemyError as exc:
|
|
return {
|
|
"ready": False,
|
|
"database": "error",
|
|
"species_count": None,
|
|
"data_state": "unavailable",
|
|
"error": str(exc),
|
|
}
|
|
|
|
|
|
def _species_to_payload(species: Species, include_sections: bool = True) -> dict[str, object]:
|
|
legacy_identifiers: list[dict[str, object]] = []
|
|
if species.flelmr_code:
|
|
legacy_identifiers.append(
|
|
{
|
|
"authority": "legacy-ecospecies",
|
|
"identifier": species.flelmr_code,
|
|
"label": "FLELMR",
|
|
}
|
|
)
|
|
taxon_identifiers = [
|
|
{
|
|
"authority": item.authority,
|
|
"identifier": item.identifier,
|
|
"label": item.label,
|
|
"primary": item.is_primary,
|
|
"source_url": item.source_url,
|
|
}
|
|
for item in species.taxon_identifiers
|
|
]
|
|
primary_taxon_identifier = next(
|
|
(item for item in taxon_identifiers if bool(item.get("primary"))),
|
|
None,
|
|
)
|
|
|
|
return {
|
|
"slug": species.slug,
|
|
"source_file": species.source_file,
|
|
"title": species.title,
|
|
"common_name": species.common_name,
|
|
"scientific_name": species.scientific_name,
|
|
"flelmr_code": species.flelmr_code,
|
|
"legacy_identifiers": legacy_identifiers,
|
|
"taxon_identifiers": taxon_identifiers,
|
|
"primary_taxon_authority": (
|
|
str(primary_taxon_identifier.get("authority", "")) if primary_taxon_identifier else ""
|
|
),
|
|
"primary_taxon_identifier": primary_taxon_identifier,
|
|
"summary": species.summary,
|
|
"section_count": species.section_count,
|
|
"citation_count": len(species.citations),
|
|
"publication_status": species.publication_status,
|
|
"is_archived": species.is_archived,
|
|
"editor_notes": species.editor_notes,
|
|
"last_modified_by": species.last_modified_by,
|
|
"diagnostics": [
|
|
{"level": diagnostic.level, "code": diagnostic.code, "message": diagnostic.message}
|
|
for diagnostic in species.diagnostics
|
|
],
|
|
"citations": [
|
|
_citation_to_payload(citation) for citation in species.citations
|
|
],
|
|
"structured_document": _structured_document_to_payload(species) if include_sections else None,
|
|
"legacy_source": _legacy_source_to_payload(species) if include_sections else None,
|
|
"sections": (
|
|
[
|
|
{
|
|
"id": section.id,
|
|
"position": section.position,
|
|
"heading": section.heading,
|
|
"content": section.content,
|
|
}
|
|
for section in species.sections
|
|
]
|
|
if include_sections
|
|
else []
|
|
),
|
|
}
|
|
|
|
|
|
def list_species(
|
|
search: str = "",
|
|
include_unpublished: bool = False,
|
|
include_archived: bool = False,
|
|
) -> list[dict[str, object]]:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
query = select(Species).order_by(Species.common_name, Species.title)
|
|
species = list(session.scalars(query))
|
|
payload = [_species_to_payload(item, include_sections=False) for item in species]
|
|
if not include_archived:
|
|
payload = [item for item in payload if not item["is_archived"]]
|
|
if not include_unpublished:
|
|
payload = [item for item in payload if item["publication_status"] == "published"]
|
|
if search:
|
|
needle = search.lower()
|
|
payload = [
|
|
item
|
|
for item in payload
|
|
if needle in item["common_name"].lower()
|
|
or needle in item["scientific_name"].lower()
|
|
or needle in item["title"].lower()
|
|
]
|
|
return payload
|
|
|
|
|
|
def get_species_by_slug(
|
|
slug: str,
|
|
include_unpublished: bool = False,
|
|
include_archived: bool = False,
|
|
) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
if not include_archived and species.is_archived:
|
|
return None
|
|
if not include_unpublished and species.publication_status != "published":
|
|
return None
|
|
_ = species.sections
|
|
_ = species.diagnostics
|
|
_ = species.citations
|
|
return _species_to_payload(species, include_sections=True)
|
|
|
|
|
|
def get_summary_metrics() -> dict[str, object]:
|
|
species = list_species()
|
|
section_total = sum(item["section_count"] for item in species)
|
|
counter = Counter()
|
|
for item in species:
|
|
for diagnostic in item["diagnostics"]:
|
|
counter[diagnostic["code"]] += 1
|
|
return {
|
|
"species_count": len(species),
|
|
"section_count": section_total,
|
|
"diagnostic_counts": dict(counter),
|
|
}
|
|
|
|
|
|
def list_diagnostics() -> list[dict[str, object]]:
|
|
species = list_species()
|
|
return [
|
|
{
|
|
"slug": item["slug"],
|
|
"common_name": item["common_name"],
|
|
"source_file": item["source_file"],
|
|
"diagnostics": item["diagnostics"],
|
|
}
|
|
for item in species
|
|
if item["diagnostics"]
|
|
]
|
|
|
|
|
|
def list_public_bibliography(search: str = "") -> list[dict[str, object]]:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species_records = list(
|
|
session.scalars(
|
|
select(Species)
|
|
.where(
|
|
Species.publication_status == "published",
|
|
Species.is_archived.is_(False),
|
|
)
|
|
.order_by(Species.common_name, Species.title)
|
|
)
|
|
)
|
|
|
|
entries: dict[str, dict[str, object]] = {}
|
|
for species in species_records:
|
|
_ = species.citations
|
|
for citation in species.citations:
|
|
doi_key = str(citation.doi).strip().lower()
|
|
openalex_key = str(citation.openalex_id).strip().lower()
|
|
citation_key = str(citation.citation_key).strip().lower()
|
|
normalized_key = " ".join(str(citation.normalized_text).split()).strip().lower()
|
|
raw_key = " ".join(str(citation.raw_text).split()).strip().lower()
|
|
dedupe_key = (
|
|
f"doi:{doi_key}" if doi_key else ""
|
|
) or (
|
|
f"openalex:{openalex_key}" if openalex_key else ""
|
|
) or (
|
|
f"key:{citation_key}" if citation_key else ""
|
|
) or (
|
|
f"normalized:{normalized_key}" if normalized_key else ""
|
|
) or (
|
|
f"raw:{raw_key}" if raw_key else ""
|
|
)
|
|
if not dedupe_key:
|
|
continue
|
|
|
|
entry = entries.get(dedupe_key)
|
|
if entry is None:
|
|
entry = {
|
|
**_citation_to_payload(citation),
|
|
"species_refs": [],
|
|
"_species_ref_keys": set(),
|
|
"_legacy_reference_numbers": set(),
|
|
}
|
|
entries[dedupe_key] = entry
|
|
|
|
if not entry.get("normalized_text") and citation.normalized_text:
|
|
entry["normalized_text"] = citation.normalized_text
|
|
if not entry.get("abstract_text") and citation.abstract_text:
|
|
entry["abstract_text"] = citation.abstract_text
|
|
if not entry.get("draft_bibtex") and citation.draft_bibtex:
|
|
entry["draft_bibtex"] = citation.draft_bibtex
|
|
if not entry.get("doi") and citation.doi:
|
|
entry["doi"] = citation.doi
|
|
if not entry.get("source_url") and citation.source_url:
|
|
entry["source_url"] = citation.source_url
|
|
if not entry.get("openalex_id") and citation.openalex_id:
|
|
entry["openalex_id"] = citation.openalex_id
|
|
|
|
species_ref_key = species.slug
|
|
if species_ref_key not in entry["_species_ref_keys"]:
|
|
entry["_species_ref_keys"].add(species_ref_key)
|
|
entry["species_refs"].append(
|
|
{
|
|
"slug": species.slug,
|
|
"common_name": species.common_name,
|
|
"scientific_name": species.scientific_name,
|
|
}
|
|
)
|
|
if citation.legacy_reference_number:
|
|
entry["_legacy_reference_numbers"].add(citation.legacy_reference_number)
|
|
|
|
items: list[dict[str, object]] = []
|
|
needle = search.strip().lower()
|
|
for entry in entries.values():
|
|
legacy_numbers = sorted(entry.pop("_legacy_reference_numbers"))
|
|
entry.pop("_species_ref_keys", None)
|
|
entry["legacy_reference_numbers"] = legacy_numbers
|
|
entry["species_count"] = len(entry["species_refs"])
|
|
|
|
if needle:
|
|
haystack = " ".join(
|
|
[
|
|
str(entry.get("normalized_text", "")),
|
|
str(entry.get("raw_text", "")),
|
|
str(entry.get("citation_key", "")),
|
|
str(entry.get("doi", "")),
|
|
str(entry.get("abstract_text", "")),
|
|
str(entry.get("draft_bibtex", "")),
|
|
]
|
|
).lower()
|
|
if needle not in haystack:
|
|
continue
|
|
items.append(entry)
|
|
|
|
items.sort(key=lambda item: (str(item.get("normalized_text", "") or item.get("raw_text", "")).lower(), str(item.get("citation_key", "")).lower()))
|
|
return items
|
|
|
|
|
|
def get_editor_species_list(search: str = "") -> list[dict[str, object]]:
|
|
return list_species(search=search, include_unpublished=True, include_archived=True)
|
|
|
|
|
|
def get_contributor_species_list(username: str, search: str = "") -> list[dict[str, object]]:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
query = (
|
|
select(Species)
|
|
.where(
|
|
Species.owner_role == "contributor",
|
|
Species.owner_username == username,
|
|
)
|
|
.order_by(Species.common_name, Species.title)
|
|
)
|
|
species = list(session.scalars(query))
|
|
payload = [_species_to_payload(item, include_sections=False) for item in species]
|
|
if search:
|
|
needle = search.lower()
|
|
payload = [
|
|
item
|
|
for item in payload
|
|
if needle in item["common_name"].lower()
|
|
or needle in item["scientific_name"].lower()
|
|
or needle in item["title"].lower()
|
|
]
|
|
return payload
|
|
|
|
|
|
def get_editor_species_workflow(slug: str) -> dict[str, object] | None:
|
|
item = get_species_by_slug(slug, include_unpublished=True, include_archived=True)
|
|
if item is None:
|
|
return None
|
|
return {
|
|
"slug": item["slug"],
|
|
"title": item["title"],
|
|
"common_name": item["common_name"],
|
|
"publication_status": item["publication_status"],
|
|
"is_archived": item["is_archived"],
|
|
"editor_notes": item["editor_notes"],
|
|
"last_modified_by": item["last_modified_by"],
|
|
"diagnostic_count": len(item["diagnostics"]),
|
|
}
|
|
|
|
|
|
def get_editor_species_detail(slug: str) -> dict[str, object] | None:
|
|
return get_species_by_slug(slug, include_unpublished=True, include_archived=True)
|
|
|
|
|
|
def get_contributor_species_detail(slug: str, username: str) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(
|
|
select(Species).where(
|
|
Species.slug == slug,
|
|
Species.owner_role == "contributor",
|
|
Species.owner_username == username,
|
|
)
|
|
)
|
|
if species is None:
|
|
return None
|
|
_ = species.sections
|
|
_ = species.diagnostics
|
|
_ = species.citations
|
|
return _species_to_payload(species, include_sections=True)
|
|
|
|
|
|
def _citation_list_payload(species: Species) -> dict[str, object]:
|
|
return {
|
|
"slug": species.slug,
|
|
"title": species.title,
|
|
"common_name": species.common_name,
|
|
"scientific_name": species.scientific_name,
|
|
"citation_count": len(species.citations),
|
|
"citations": [_citation_to_payload(citation) for citation in species.citations],
|
|
}
|
|
|
|
|
|
def get_editor_species_citations(slug: str) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
_ = species.citations
|
|
return _citation_list_payload(species)
|
|
|
|
|
|
def get_contributor_species_citations(slug: str, username: str) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(
|
|
select(Species).where(
|
|
Species.slug == slug,
|
|
Species.owner_role == "contributor",
|
|
Species.owner_username == username,
|
|
)
|
|
)
|
|
if species is None:
|
|
return None
|
|
_ = species.citations
|
|
return _citation_list_payload(species)
|
|
|
|
|
|
def list_species_audit(slug: str) -> list[dict[str, object]] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
_ = species.audit_entries
|
|
return [
|
|
{
|
|
"id": entry.id,
|
|
"changed_by": entry.changed_by,
|
|
"changed_at": entry.changed_at,
|
|
"action": entry.action,
|
|
"details": json.loads(entry.details_json),
|
|
}
|
|
for entry in species.audit_entries
|
|
]
|
|
|
|
|
|
def update_species_editorial(
|
|
slug: str,
|
|
publication_status: str | None,
|
|
summary: str | None,
|
|
editor_notes: str | None,
|
|
is_archived: bool | None,
|
|
username: str,
|
|
) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
before = {
|
|
"publication_status": species.publication_status,
|
|
"summary": species.summary,
|
|
"editor_notes": species.editor_notes,
|
|
"is_archived": species.is_archived,
|
|
}
|
|
|
|
if publication_status is not None:
|
|
normalized = publication_status.strip().lower()
|
|
if normalized not in WORKFLOW_STATUSES:
|
|
raise ValueError(
|
|
f"Unsupported publication_status: {publication_status}. "
|
|
f"Expected one of {sorted(WORKFLOW_STATUSES)}"
|
|
)
|
|
species.publication_status = normalized
|
|
|
|
if summary is not None:
|
|
species.summary = summary.strip()
|
|
|
|
if editor_notes is not None:
|
|
species.editor_notes = editor_notes.strip()
|
|
|
|
if is_archived is not None:
|
|
species.is_archived = is_archived
|
|
|
|
after = {
|
|
"publication_status": species.publication_status,
|
|
"summary": species.summary,
|
|
"editor_notes": species.editor_notes,
|
|
"is_archived": species.is_archived,
|
|
}
|
|
changed_fields = {
|
|
key: {"from": before[key], "to": after[key]}
|
|
for key in before
|
|
if before[key] != after[key]
|
|
}
|
|
|
|
if changed_fields:
|
|
species.last_modified_by = username
|
|
session.add(
|
|
SpeciesAuditLog(
|
|
species_id=species.id,
|
|
changed_by=username,
|
|
changed_at=datetime.now(timezone.utc).isoformat(),
|
|
action="editorial_update",
|
|
details_json=json.dumps(changed_fields, ensure_ascii=True),
|
|
)
|
|
)
|
|
|
|
session.add(species)
|
|
session.commit()
|
|
session.refresh(species)
|
|
|
|
return {
|
|
"slug": species.slug,
|
|
"summary": species.summary,
|
|
"publication_status": species.publication_status,
|
|
"editor_notes": species.editor_notes,
|
|
"is_archived": species.is_archived,
|
|
"last_modified_by": species.last_modified_by,
|
|
"changed_fields": changed_fields,
|
|
}
|
|
|
|
|
|
def _normalize_email(email: str) -> str:
|
|
normalized = email.strip().lower()
|
|
if not EMAIL_PATTERN.fullmatch(normalized):
|
|
raise ValueError("Contributor username must be a valid email address.")
|
|
return normalized
|
|
|
|
|
|
def get_minimum_contributor_age() -> int:
|
|
configured = os.environ.get("ECOSPECIES_MINIMUM_CONTRIBUTOR_AGE", "13").strip()
|
|
try:
|
|
minimum_age = int(configured)
|
|
except ValueError as exc: # pragma: no cover - misconfiguration path
|
|
raise ValueError("ECOSPECIES_MINIMUM_CONTRIBUTOR_AGE must be an integer.") from exc
|
|
if minimum_age < 1:
|
|
raise ValueError("ECOSPECIES_MINIMUM_CONTRIBUTOR_AGE must be positive.")
|
|
return minimum_age
|
|
|
|
|
|
def register_contributor(email: str, age_gate_confirmed: bool) -> dict[str, object]:
|
|
ensure_schema()
|
|
normalized_email = _normalize_email(email)
|
|
minimum_age = get_minimum_contributor_age()
|
|
if not age_gate_confirmed:
|
|
raise ValueError(
|
|
f"Contributors must confirm they are at least {minimum_age} years old."
|
|
)
|
|
|
|
token = secrets.token_urlsafe(24)
|
|
token_hash = hashlib.sha256(token.encode("utf-8")).hexdigest()
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
|
|
with SessionLocal() as session:
|
|
existing = session.scalar(
|
|
select(ContributorAccount).where(ContributorAccount.email == normalized_email)
|
|
)
|
|
if existing is not None:
|
|
raise ValueError("A contributor account already exists for that email address.")
|
|
|
|
session.add(
|
|
ContributorAccount(
|
|
email=normalized_email,
|
|
token_hash=token_hash,
|
|
age_gate_confirmed=True,
|
|
created_at=now,
|
|
is_active=True,
|
|
)
|
|
)
|
|
session.commit()
|
|
|
|
return {
|
|
"username": normalized_email,
|
|
"role": "contributor",
|
|
"token": token,
|
|
"minimum_age": minimum_age,
|
|
"warning": "Store this token now. You will not be able to access your contributed species later without it.",
|
|
}
|
|
|
|
|
|
def _build_initial_contributor_markdown(email: str) -> str:
|
|
title = "New Species Draft"
|
|
return (
|
|
"---\n"
|
|
f"title: {title}\n"
|
|
"common_name: \n"
|
|
"scientific_name: \n"
|
|
"species_code: \n"
|
|
f"source_file: {CONTRIBUTOR_SUBMISSION_PREFIX}.md\n"
|
|
"publication_status: draft\n"
|
|
"source_format: ecospecies-markdown-v1\n"
|
|
"---\n\n"
|
|
"## Summary\n"
|
|
"Provide a concise summary.\n\n"
|
|
"## Habitat\n"
|
|
"Describe habitat.\n"
|
|
)
|
|
|
|
|
|
def _next_unique_slug(session, base_slug: str) -> str:
|
|
candidate = base_slug
|
|
suffix = 2
|
|
while session.scalar(select(Species.id).where(Species.slug == candidate)) is not None:
|
|
candidate = f"{base_slug}-{suffix}"
|
|
suffix += 1
|
|
return candidate
|
|
|
|
|
|
def create_contributor_species(username: str, markdown: str | None = None) -> dict[str, object]:
|
|
ensure_schema()
|
|
normalized_email = _normalize_email(username)
|
|
source_markdown = (markdown or _build_initial_contributor_markdown(normalized_email)).strip()
|
|
if not source_markdown.endswith("\n"):
|
|
source_markdown += "\n"
|
|
|
|
with SessionLocal() as session:
|
|
document_model = parse_markdown_document(source_markdown)
|
|
projection = extract_species_projection(document_model)
|
|
slug_base = slugify(
|
|
str(projection.get("common_name") or projection.get("title") or CONTRIBUTOR_SUBMISSION_PREFIX)
|
|
)
|
|
slug = _next_unique_slug(session, slug_base)
|
|
species = Species(
|
|
slug=slug,
|
|
source_file=f"{CONTRIBUTOR_SUBMISSION_PREFIX}-{slug}.md",
|
|
title=str(projection.get("title") or "New Species Draft"),
|
|
common_name=str(projection.get("common_name") or ""),
|
|
scientific_name=str(projection.get("scientific_name") or ""),
|
|
flelmr_code=str(projection.get("flelmr_code") or ""),
|
|
summary=str(projection.get("summary") or ""),
|
|
section_count=len(projection["sections"]),
|
|
publication_status="draft",
|
|
is_archived=False,
|
|
editor_notes="",
|
|
created_by=normalized_email,
|
|
owner_username=normalized_email,
|
|
owner_role="contributor",
|
|
last_modified_by=normalized_email,
|
|
)
|
|
session.add(species)
|
|
session.flush()
|
|
save_species_document(session, species, source_markdown, normalized_email)
|
|
session.add(
|
|
SpeciesAuditLog(
|
|
species_id=species.id,
|
|
changed_by=normalized_email,
|
|
changed_at=datetime.now(timezone.utc).isoformat(),
|
|
action="contributor_create",
|
|
details_json=json.dumps({"publication_status": "draft"}, ensure_ascii=True),
|
|
)
|
|
)
|
|
session.commit()
|
|
return {
|
|
"slug": species.slug,
|
|
"publication_status": species.publication_status,
|
|
"last_modified_by": species.last_modified_by,
|
|
}
|
|
|
|
|
|
def get_contributor_species_document(slug: str, username: str) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(
|
|
select(Species).where(
|
|
Species.slug == slug,
|
|
Species.owner_role == "contributor",
|
|
Species.owner_username == username,
|
|
)
|
|
)
|
|
if species is None:
|
|
return None
|
|
return get_species_document_payload(session, slug)
|
|
|
|
|
|
def update_species_section(
|
|
slug: str,
|
|
section_position: int,
|
|
content: str,
|
|
username: str,
|
|
) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
|
|
section = session.scalar(
|
|
select(DocumentSection).where(
|
|
DocumentSection.species_id == species.id,
|
|
DocumentSection.position == section_position,
|
|
)
|
|
)
|
|
if section is None:
|
|
return None
|
|
|
|
new_content = content.strip()
|
|
changed_fields = {}
|
|
if section.content != new_content:
|
|
changed_fields["section_content"] = {
|
|
"from": section.content,
|
|
"to": new_content,
|
|
}
|
|
|
|
if changed_fields:
|
|
section.content = new_content
|
|
species.last_modified_by = username
|
|
session.add(
|
|
SpeciesAuditLog(
|
|
species_id=species.id,
|
|
changed_by=username,
|
|
changed_at=datetime.now(timezone.utc).isoformat(),
|
|
action="section_update",
|
|
details_json=json.dumps(
|
|
{
|
|
"section_position": section.position,
|
|
"section_heading": section.heading,
|
|
**changed_fields,
|
|
},
|
|
ensure_ascii=True,
|
|
),
|
|
)
|
|
)
|
|
|
|
session.add(section)
|
|
session.add(species)
|
|
session.commit()
|
|
session.refresh(section)
|
|
|
|
return {
|
|
"slug": species.slug,
|
|
"section": {
|
|
"id": section.id,
|
|
"position": section.position,
|
|
"heading": section.heading,
|
|
"content": section.content,
|
|
},
|
|
"last_modified_by": species.last_modified_by,
|
|
"changed_fields": changed_fields,
|
|
}
|
|
|
|
|
|
def update_species_document_markdown(
|
|
slug: str,
|
|
markdown: str,
|
|
username: str,
|
|
) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
|
|
result = save_species_document(session, species, markdown, username)
|
|
session.add(
|
|
SpeciesAuditLog(
|
|
species_id=species.id,
|
|
changed_by=username,
|
|
changed_at=datetime.now(timezone.utc).isoformat(),
|
|
action="document_update",
|
|
details_json=json.dumps(
|
|
{"source_format": "ecospecies-markdown-v1"},
|
|
ensure_ascii=True,
|
|
),
|
|
)
|
|
)
|
|
session.commit()
|
|
return result
|
|
|
|
|
|
def update_species_citation_review(
|
|
slug: str,
|
|
citation_id: int,
|
|
review_status: str | None,
|
|
normalized_text: str | None,
|
|
doi: str | None,
|
|
citation_key: str | None,
|
|
entry_type: str | None,
|
|
draft_bibtex: str | None,
|
|
username: str,
|
|
*,
|
|
abstract_text: str | None = None,
|
|
) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
|
|
citation = session.scalar(
|
|
select(SpeciesCitation).where(
|
|
SpeciesCitation.species_id == species.id,
|
|
SpeciesCitation.id == citation_id,
|
|
)
|
|
)
|
|
if citation is None:
|
|
return None
|
|
|
|
before = _citation_to_payload(citation)
|
|
if review_status is not None:
|
|
normalized_status = review_status.strip().lower()
|
|
if normalized_status not in CITATION_REVIEW_STATUSES:
|
|
raise ValueError(
|
|
f"Unsupported review_status: {review_status}. "
|
|
f"Expected one of {sorted(CITATION_REVIEW_STATUSES)}"
|
|
)
|
|
citation.review_status = normalized_status
|
|
if normalized_text is not None:
|
|
citation.normalized_text = normalized_text.strip()
|
|
if abstract_text is not None:
|
|
citation.abstract_text = abstract_text.strip()
|
|
if doi is not None:
|
|
citation.doi = doi.strip()
|
|
if citation_key is not None:
|
|
citation.citation_key = citation_key.strip()
|
|
if entry_type is not None:
|
|
citation.entry_type = entry_type.strip() or "misc"
|
|
if draft_bibtex is not None:
|
|
citation.draft_bibtex = draft_bibtex.strip()
|
|
citation.source_type = "editor_review"
|
|
|
|
after = _citation_to_payload(citation)
|
|
changed_fields = {
|
|
field: {"from": before[field], "to": after[field]}
|
|
for field in (
|
|
"review_status",
|
|
"normalized_text",
|
|
"abstract_text",
|
|
"doi",
|
|
"citation_key",
|
|
"entry_type",
|
|
"draft_bibtex",
|
|
"source_type",
|
|
)
|
|
if before[field] != after[field]
|
|
}
|
|
|
|
if changed_fields:
|
|
species.last_modified_by = username
|
|
session.add(
|
|
SpeciesAuditLog(
|
|
species_id=species.id,
|
|
changed_by=username,
|
|
changed_at=datetime.now(timezone.utc).isoformat(),
|
|
action="citation_review_update",
|
|
details_json=json.dumps(
|
|
{"citation_id": citation.id, **changed_fields},
|
|
ensure_ascii=True,
|
|
),
|
|
)
|
|
)
|
|
|
|
session.add(citation)
|
|
session.add(species)
|
|
session.commit()
|
|
session.refresh(citation)
|
|
|
|
return {
|
|
"slug": species.slug,
|
|
"citation": _citation_to_payload(citation),
|
|
"last_modified_by": species.last_modified_by,
|
|
"changed_fields": changed_fields,
|
|
}
|
|
|
|
|
|
def get_species_citation_candidates(slug: str, citation_id: int) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
|
|
citation = session.scalar(
|
|
select(SpeciesCitation).where(
|
|
SpeciesCitation.species_id == species.id,
|
|
SpeciesCitation.id == citation_id,
|
|
)
|
|
)
|
|
if citation is None:
|
|
return None
|
|
|
|
candidates = discover_citation_candidates(_citation_to_payload(citation))
|
|
return {
|
|
"slug": species.slug,
|
|
"citation": _citation_to_payload(citation),
|
|
**candidates,
|
|
}
|
|
|
|
|
|
def _apply_citation_enrichment(
|
|
session,
|
|
species: Species,
|
|
citation: SpeciesCitation,
|
|
username: str,
|
|
) -> dict[str, object]:
|
|
before = _citation_to_payload(citation)
|
|
enrichment = enrich_citation_payload(before)
|
|
|
|
for field in (
|
|
"citation_key",
|
|
"entry_type",
|
|
"normalized_text",
|
|
"abstract_text",
|
|
"draft_bibtex",
|
|
"doi",
|
|
"source_url",
|
|
"openalex_id",
|
|
"resolver_source_label",
|
|
"enrichment_status",
|
|
"enrichment_error",
|
|
):
|
|
if field in enrichment:
|
|
setattr(citation, field, str(enrichment.get(field, "")).strip())
|
|
|
|
after = _citation_to_payload(citation)
|
|
changed_fields = {
|
|
field: {"from": before[field], "to": after[field]}
|
|
for field in (
|
|
"citation_key",
|
|
"entry_type",
|
|
"normalized_text",
|
|
"abstract_text",
|
|
"draft_bibtex",
|
|
"doi",
|
|
"source_url",
|
|
"openalex_id",
|
|
"resolver_source_label",
|
|
"enrichment_status",
|
|
"enrichment_error",
|
|
)
|
|
if before[field] != after[field]
|
|
}
|
|
conflicts = enrichment.get("conflicts")
|
|
if conflicts:
|
|
changed_fields["resolver_conflicts"] = list(conflicts)
|
|
|
|
if changed_fields:
|
|
species.last_modified_by = username
|
|
session.add(
|
|
SpeciesAuditLog(
|
|
species_id=species.id,
|
|
changed_by=username,
|
|
changed_at=datetime.now(timezone.utc).isoformat(),
|
|
action="citation_enrichment",
|
|
details_json=json.dumps(
|
|
{"citation_id": citation.id, **changed_fields},
|
|
ensure_ascii=True,
|
|
),
|
|
)
|
|
)
|
|
|
|
session.add(citation)
|
|
session.add(species)
|
|
return {
|
|
"citation": _citation_to_payload(citation),
|
|
"changed_fields": changed_fields,
|
|
}
|
|
|
|
|
|
def _next_citation_position(species: Species) -> int:
|
|
if not species.citations:
|
|
return 1
|
|
return max(citation.position for citation in species.citations) + 1
|
|
|
|
|
|
def update_species_citation_enrichment(
|
|
slug: str,
|
|
citation_id: int,
|
|
username: str,
|
|
) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
|
|
citation = session.scalar(
|
|
select(SpeciesCitation).where(
|
|
SpeciesCitation.species_id == species.id,
|
|
SpeciesCitation.id == citation_id,
|
|
)
|
|
)
|
|
if citation is None:
|
|
return None
|
|
|
|
result = _apply_citation_enrichment(session, species, citation, username)
|
|
session.commit()
|
|
session.refresh(citation)
|
|
|
|
return {
|
|
"slug": species.slug,
|
|
"citation": result["citation"],
|
|
"last_modified_by": species.last_modified_by,
|
|
"changed_fields": result["changed_fields"],
|
|
}
|
|
|
|
|
|
def apply_species_citation_candidate_selection(
|
|
slug: str,
|
|
citation_id: int,
|
|
candidate: dict[str, object],
|
|
username: str,
|
|
) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
|
|
citation = session.scalar(
|
|
select(SpeciesCitation).where(
|
|
SpeciesCitation.species_id == species.id,
|
|
SpeciesCitation.id == citation_id,
|
|
)
|
|
)
|
|
if citation is None:
|
|
return None
|
|
|
|
before = _citation_to_payload(citation)
|
|
enrichment = apply_citation_candidate_selection(before, candidate)
|
|
for field in (
|
|
"citation_key",
|
|
"entry_type",
|
|
"normalized_text",
|
|
"abstract_text",
|
|
"draft_bibtex",
|
|
"doi",
|
|
"source_url",
|
|
"openalex_id",
|
|
"resolver_source_label",
|
|
"enrichment_status",
|
|
"enrichment_error",
|
|
):
|
|
if field in enrichment:
|
|
setattr(citation, field, str(enrichment.get(field, "")).strip())
|
|
citation.source_type = "editor_selected_candidate"
|
|
citation.review_status = "accepted"
|
|
|
|
after = _citation_to_payload(citation)
|
|
changed_fields = {
|
|
field: {"from": before[field], "to": after[field]}
|
|
for field in (
|
|
"citation_key",
|
|
"entry_type",
|
|
"normalized_text",
|
|
"abstract_text",
|
|
"draft_bibtex",
|
|
"doi",
|
|
"source_url",
|
|
"openalex_id",
|
|
"resolver_source_label",
|
|
"enrichment_status",
|
|
"enrichment_error",
|
|
"source_type",
|
|
"review_status",
|
|
)
|
|
if before[field] != after[field]
|
|
}
|
|
|
|
if changed_fields:
|
|
species.last_modified_by = username
|
|
session.add(
|
|
SpeciesAuditLog(
|
|
species_id=species.id,
|
|
changed_by=username,
|
|
changed_at=datetime.now(timezone.utc).isoformat(),
|
|
action="citation_candidate_selection",
|
|
details_json=json.dumps(
|
|
{
|
|
"citation_id": citation.id,
|
|
"selected_source_label": str(candidate.get("source_label", "")).strip(),
|
|
**changed_fields,
|
|
},
|
|
ensure_ascii=True,
|
|
),
|
|
)
|
|
)
|
|
|
|
session.add(citation)
|
|
session.add(species)
|
|
session.commit()
|
|
session.refresh(citation)
|
|
return {
|
|
"slug": species.slug,
|
|
"citation": _citation_to_payload(citation),
|
|
"last_modified_by": species.last_modified_by,
|
|
"changed_fields": changed_fields,
|
|
}
|
|
|
|
|
|
def add_species_citation_from_candidate(
|
|
slug: str,
|
|
citation_id: int,
|
|
candidate: dict[str, object],
|
|
username: str,
|
|
) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
|
|
citation = session.scalar(
|
|
select(SpeciesCitation).where(
|
|
SpeciesCitation.species_id == species.id,
|
|
SpeciesCitation.id == citation_id,
|
|
)
|
|
)
|
|
if citation is None:
|
|
return None
|
|
|
|
enrichment = apply_citation_candidate_selection(_citation_to_payload(citation), candidate)
|
|
raw_text = (
|
|
str(enrichment.get("normalized_text", "")).strip()
|
|
or str(candidate.get("fields", {}).get("title", "")).strip()
|
|
or str(citation.raw_text).strip()
|
|
)
|
|
|
|
document_markdown = species.document.markdown_content if species.document is not None else ""
|
|
document_model = parse_markdown_document(document_markdown)
|
|
added = add_citation_to_document(document_model, raw_text, heading_title="Related References")
|
|
updated_markdown = export_markdown_document(document_model)
|
|
save_species_document(session, species, updated_markdown, username)
|
|
|
|
new_citation = session.scalar(
|
|
select(SpeciesCitation).where(
|
|
SpeciesCitation.species_id == species.id,
|
|
SpeciesCitation.raw_text == raw_text,
|
|
)
|
|
)
|
|
if new_citation is None:
|
|
return None
|
|
|
|
new_citation.source_type = "editor_added_candidate"
|
|
new_citation.review_status = "accepted"
|
|
new_citation.citation_key = str(enrichment.get("citation_key", "")).strip()
|
|
new_citation.entry_type = str(enrichment.get("entry_type", "misc")).strip() or "misc"
|
|
new_citation.normalized_text = str(enrichment.get("normalized_text", "")).strip()
|
|
new_citation.abstract_text = str(enrichment.get("abstract_text", "")).strip()
|
|
new_citation.draft_bibtex = str(enrichment.get("draft_bibtex", "")).strip()
|
|
new_citation.doi = str(enrichment.get("doi", "")).strip()
|
|
new_citation.source_url = str(enrichment.get("source_url", "")).strip()
|
|
new_citation.openalex_id = str(enrichment.get("openalex_id", "")).strip()
|
|
new_citation.resolver_source_label = str(enrichment.get("resolver_source_label", "")).strip()
|
|
new_citation.enrichment_status = str(enrichment.get("enrichment_status", "resolved")).strip()
|
|
new_citation.enrichment_error = str(enrichment.get("enrichment_error", "")).strip()
|
|
|
|
if not added:
|
|
new_citation.source_type = "editor_added_candidate"
|
|
new_citation.review_status = "accepted"
|
|
|
|
session.add(new_citation)
|
|
species.last_modified_by = username
|
|
session.add(
|
|
SpeciesAuditLog(
|
|
species_id=species.id,
|
|
changed_by=username,
|
|
changed_at=datetime.now(timezone.utc).isoformat(),
|
|
action="citation_candidate_addition",
|
|
details_json=json.dumps(
|
|
{
|
|
"source_citation_id": citation.id,
|
|
"selected_source_label": str(candidate.get("source_label", "")).strip(),
|
|
"new_citation_key": new_citation.citation_key,
|
|
},
|
|
ensure_ascii=True,
|
|
),
|
|
)
|
|
)
|
|
session.commit()
|
|
session.refresh(new_citation)
|
|
return {
|
|
"slug": species.slug,
|
|
"citation": _citation_to_payload(new_citation),
|
|
"last_modified_by": species.last_modified_by,
|
|
}
|
|
|
|
|
|
def update_species_citations_enrichment_batch(
|
|
slug: str,
|
|
username: str,
|
|
) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
|
|
_ = species.citations
|
|
updated_citations: list[dict[str, object]] = []
|
|
changed_count = 0
|
|
resolved_count = 0
|
|
unresolved_count = 0
|
|
error_count = 0
|
|
|
|
for citation in species.citations:
|
|
result = _apply_citation_enrichment(session, species, citation, username)
|
|
updated_citations.append(result["citation"])
|
|
if result["changed_fields"]:
|
|
changed_count += 1
|
|
status = str(result["citation"].get("enrichment_status", "")).strip()
|
|
if status == "resolved":
|
|
resolved_count += 1
|
|
elif status == "unresolved":
|
|
unresolved_count += 1
|
|
elif status == "error":
|
|
error_count += 1
|
|
|
|
session.commit()
|
|
|
|
return {
|
|
"slug": species.slug,
|
|
"citation_count": len(updated_citations),
|
|
"changed_count": changed_count,
|
|
"resolved_count": resolved_count,
|
|
"unresolved_count": unresolved_count,
|
|
"error_count": error_count,
|
|
"citations": updated_citations,
|
|
"last_modified_by": species.last_modified_by,
|
|
}
|
|
|
|
|
|
def _should_backfill_citation(citation: SpeciesCitation, include_accepted: bool = False) -> bool:
|
|
review_status = str(citation.review_status or "").strip().lower()
|
|
source_type = str(citation.source_type or "").strip().lower()
|
|
enrichment_status = str(citation.enrichment_status or "").strip().lower()
|
|
normalized_text = str(citation.normalized_text or "").strip()
|
|
abstract_text = str(citation.abstract_text or "").strip()
|
|
|
|
if not include_accepted and review_status == "accepted":
|
|
return False
|
|
if source_type in {"editor_selected_candidate", "editor_added_candidate"} and not include_accepted:
|
|
return False
|
|
|
|
return (
|
|
source_type in {"document_extract", "editor_review", ""}
|
|
or enrichment_status in {"pending", "unresolved", "error", ""}
|
|
or not normalized_text
|
|
or not abstract_text
|
|
)
|
|
|
|
|
|
def backfill_species_citations(
|
|
slug: str,
|
|
username: str,
|
|
include_accepted: bool = False,
|
|
) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
with SessionLocal() as session:
|
|
species = session.scalar(select(Species).where(Species.slug == slug))
|
|
if species is None:
|
|
return None
|
|
|
|
_ = species.citations
|
|
updated_citations: list[dict[str, object]] = []
|
|
changed_count = 0
|
|
resolved_count = 0
|
|
unresolved_count = 0
|
|
error_count = 0
|
|
backfilled_count = 0
|
|
|
|
for citation in species.citations:
|
|
if _should_backfill_citation(citation, include_accepted=include_accepted):
|
|
backfilled_count += 1
|
|
result = _apply_citation_enrichment(session, species, citation, username)
|
|
payload = result["citation"]
|
|
if result["changed_fields"]:
|
|
changed_count += 1
|
|
else:
|
|
payload = _citation_to_payload(citation)
|
|
updated_citations.append(payload)
|
|
status = str(payload.get("enrichment_status", "")).strip()
|
|
if status == "resolved":
|
|
resolved_count += 1
|
|
elif status == "unresolved":
|
|
unresolved_count += 1
|
|
elif status == "error":
|
|
error_count += 1
|
|
|
|
session.commit()
|
|
|
|
return {
|
|
"slug": species.slug,
|
|
"citation_count": len(updated_citations),
|
|
"backfilled_count": backfilled_count,
|
|
"changed_count": changed_count,
|
|
"resolved_count": resolved_count,
|
|
"unresolved_count": unresolved_count,
|
|
"error_count": error_count,
|
|
"citations": updated_citations,
|
|
"last_modified_by": species.last_modified_by,
|
|
}
|
|
|
|
|
|
def update_contributor_species_document_markdown(
|
|
slug: str,
|
|
markdown: str,
|
|
username: str,
|
|
) -> dict[str, object] | None:
|
|
ensure_schema()
|
|
normalized_email = _normalize_email(username)
|
|
with SessionLocal() as session:
|
|
species = session.scalar(
|
|
select(Species).where(
|
|
Species.slug == slug,
|
|
Species.owner_role == "contributor",
|
|
Species.owner_username == normalized_email,
|
|
)
|
|
)
|
|
if species is None:
|
|
return None
|
|
|
|
result = save_species_document(session, species, markdown, normalized_email)
|
|
if species.publication_status == "published":
|
|
species.publication_status = "review"
|
|
session.add(
|
|
SpeciesAuditLog(
|
|
species_id=species.id,
|
|
changed_by=normalized_email,
|
|
changed_at=datetime.now(timezone.utc).isoformat(),
|
|
action="contributor_document_update",
|
|
details_json=json.dumps(
|
|
{"source_format": "ecospecies-markdown-v1"},
|
|
ensure_ascii=True,
|
|
),
|
|
)
|
|
)
|
|
session.commit()
|
|
return {
|
|
**result,
|
|
"publication_status": species.publication_status,
|
|
}
|