196 lines
6.0 KiB
Python
196 lines
6.0 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import unittest
|
|
|
|
from ecospecies_api.document_format import (
|
|
DocumentNode,
|
|
StructuredDocument,
|
|
build_document_from_species_payload,
|
|
extract_citation_entries,
|
|
extract_species_projection,
|
|
export_markdown_document,
|
|
parse_markdown_document,
|
|
validate_markdown_document,
|
|
)
|
|
|
|
|
|
class StructuredMarkdownTests(unittest.TestCase):
|
|
def test_round_trip_markdown_preserves_metadata_and_hierarchy(self) -> None:
|
|
source = """---
|
|
title: American Oyster
|
|
common_name: American Oyster
|
|
scientific_name: Crassostrea virginica
|
|
legacy_identifiers:
|
|
- authority: legacy-ecospecies
|
|
identifier: 5192
|
|
label: FLELMR
|
|
taxon_identifiers:
|
|
- authority: worms
|
|
identifier: 159059
|
|
label: AphiaID
|
|
primary: true
|
|
primary_taxon_authority: worms
|
|
---
|
|
|
|
## Summary
|
|
Short abstract.
|
|
|
|
## Habitat
|
|
|
|
### Type
|
|
Estuarine.
|
|
"""
|
|
|
|
document = parse_markdown_document(source)
|
|
|
|
self.assertEqual(document.metadata["title"], "American Oyster")
|
|
self.assertEqual(document.metadata["primary_taxon_authority"], "worms")
|
|
self.assertEqual(document.metadata["legacy_identifiers"][0]["identifier"], "5192")
|
|
self.assertEqual(document.metadata["taxon_identifiers"][0]["authority"], "worms")
|
|
self.assertEqual(document.nodes[0].title, "Summary")
|
|
self.assertEqual(document.nodes[1].children[0].title, "Type")
|
|
self.assertIn("## Habitat", export_markdown_document(document))
|
|
|
|
def test_build_document_from_species_payload_creates_markdown_sections(self) -> None:
|
|
document = build_document_from_species_payload(
|
|
{
|
|
"title": "American Oyster",
|
|
"common_name": "American Oyster",
|
|
"scientific_name": "Crassostrea virginica",
|
|
"flelmr_code": "5192",
|
|
"source_file": "American Oyster.txt",
|
|
"summary": "Short abstract.",
|
|
"sections": [
|
|
{"heading": "HEADER", "content": "Ignored header"},
|
|
{"heading": "Habitat", "content": "Estuarine."},
|
|
{"heading": "Reproduction", "content": "Broadcast spawner."},
|
|
],
|
|
}
|
|
)
|
|
|
|
self.assertEqual(document.metadata["legacy_identifiers"][0]["identifier"], "5192")
|
|
self.assertEqual(document.metadata["legacy_identifiers"][0]["authority"], "legacy-ecospecies")
|
|
self.assertEqual([node.title for node in document.nodes], ["Summary", "Habitat", "Reproduction"])
|
|
self.assertEqual(document.nodes[1].body, "Estuarine.")
|
|
|
|
def test_extract_species_projection_flattens_nested_headings(self) -> None:
|
|
document = parse_markdown_document(
|
|
"""---
|
|
title: American Oyster
|
|
common_name: American Oyster
|
|
scientific_name: Crassostrea virginica
|
|
legacy_identifiers:
|
|
- authority: legacy-ecospecies
|
|
identifier: 5192
|
|
label: FLELMR
|
|
---
|
|
|
|
## Summary
|
|
Short abstract.
|
|
|
|
## Habitat
|
|
General habitat.
|
|
|
|
### Type
|
|
Estuarine.
|
|
"""
|
|
)
|
|
|
|
projection = extract_species_projection(document)
|
|
|
|
self.assertEqual(projection["summary"], "Short abstract.")
|
|
self.assertEqual(projection["flelmr_code"], "5192")
|
|
self.assertEqual(
|
|
[section["heading"] for section in projection["sections"]],
|
|
["Habitat", "Habitat / Type"],
|
|
)
|
|
|
|
def test_extract_species_projection_accepts_legacy_species_code_front_matter(self) -> None:
|
|
document = parse_markdown_document(
|
|
"""---
|
|
title: Legacy Fish
|
|
common_name: Legacy Fish
|
|
scientific_name: Pisces historicus
|
|
species_code: 4242
|
|
---
|
|
|
|
## Habitat
|
|
Estuarine.
|
|
"""
|
|
)
|
|
|
|
projection = extract_species_projection(document)
|
|
|
|
self.assertEqual(projection["flelmr_code"], "4242")
|
|
|
|
def test_validate_markdown_document_rejects_missing_front_matter_and_depth_jump(self) -> None:
|
|
errors = validate_markdown_document(
|
|
"""## Habitat
|
|
Text
|
|
|
|
#### Type
|
|
Nested too deeply.
|
|
"""
|
|
)
|
|
|
|
self.assertTrue(any("front matter" in error for error in errors))
|
|
self.assertTrue(any("Heading depth jumps" in error for error in errors))
|
|
|
|
def test_extract_citation_entries_strips_legacy_comma_number_prefix(self) -> None:
|
|
document = parse_markdown_document(
|
|
"""---
|
|
title: Alabama Shad
|
|
common_name: Alabama Shad
|
|
scientific_name: Alosa alabamae
|
|
---
|
|
|
|
## References
|
|
160, Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.
|
|
"""
|
|
)
|
|
|
|
citations = extract_citation_entries(document)
|
|
|
|
self.assertEqual(len(citations), 1)
|
|
self.assertEqual(citations[0]["legacy_reference_number"], "160")
|
|
self.assertTrue(citations[0]["raw_text"].startswith("Daniell, W.C. 1872."))
|
|
self.assertFalse(citations[0]["raw_text"].startswith("160,"))
|
|
|
|
def test_extract_citation_entries_accepts_colon_terminated_citation_heading(self) -> None:
|
|
citations = extract_citation_entries(
|
|
StructuredDocument(
|
|
metadata={},
|
|
nodes=[
|
|
DocumentNode(
|
|
node_type="section",
|
|
title="Citations:",
|
|
body="7, Ahmed, M. 1975. Speciation in living oysters.",
|
|
depth=2,
|
|
)
|
|
],
|
|
)
|
|
)
|
|
|
|
self.assertEqual(len(citations), 1)
|
|
self.assertEqual(citations[0]["legacy_reference_number"], "7")
|
|
|
|
def test_extract_citation_entries_accepts_bare_number_prefix(self) -> None:
|
|
document = parse_markdown_document(
|
|
"""---
|
|
title: Eastern Mosquitofish
|
|
common_name: Eastern Mosquitofish
|
|
scientific_name: Gambusia holbrooki
|
|
---
|
|
|
|
## Citations
|
|
848 Gilmore, R.G. 1977. Fishes of the Indian River Lagoon and adjacent waters, Florida.
|
|
"""
|
|
)
|
|
|
|
citations = extract_citation_entries(document)
|
|
|
|
self.assertEqual(len(citations), 1)
|
|
self.assertEqual(citations[0]["legacy_reference_number"], "848")
|
|
self.assertTrue(citations[0]["raw_text"].startswith("Gilmore, R.G. 1977."))
|