EcoSpecies-Atlas/apps/api/tests/test_parser.py

110 lines
4.0 KiB
Python

from __future__ import annotations
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from ecospecies_api import parser
class ParserPathResolutionTests(unittest.TestCase):
def test_ecospecies_code_is_treated_as_flelmr_code(self) -> None:
metadata = parser.extract_metadata(
[
"Title: Test Fish",
"EcoSpecies Code: 4242",
]
)
self.assertEqual(metadata["ecospecies code"], "4242")
self.assertEqual(metadata["flelmr"], "4242")
def test_title_case_headings_are_split_into_sections(self) -> None:
sections = parser.split_sections(
[
"Species profile: American oyster (Crassostrea virginica)",
"",
"Classification",
" Phylum: Mollusca",
"Value",
"Commercial: Important fishery.",
"Habitat",
"Type: Estuarine.",
]
)
self.assertEqual(
[section.heading for section in sections],
["HEADER", "Classification", "Value", "Habitat"],
)
def test_colon_terminated_title_case_headings_are_split_into_sections(self) -> None:
sections = parser.split_sections(
[
"Ecological Interactions and Notes",
"Predator text.",
"",
"Reference Numbers:",
"",
"Citations:",
"7, Ahmed, M. 1975. Speciation in living oysters.",
]
)
self.assertEqual(
[section.heading for section in sections],
["HEADER", "Citations"],
)
def test_default_data_dir_uses_in_repo_path_without_spaces(self) -> None:
with patch.dict("os.environ", {}, clear=True):
resolved = Path(parser.get_default_data_dir())
self.assertEqual(resolved, parser.get_repo_root() / "input-data" / "InputFiles")
def test_relative_override_must_stay_within_repo(self) -> None:
with self.assertRaisesRegex(ValueError, "within the codebase directory"):
parser.resolve_data_dir("../input-data/InputFiles")
def test_absolute_override_outside_repo_is_rejected(self) -> None:
with tempfile.TemporaryDirectory() as tempdir:
with self.assertRaisesRegex(ValueError, "within the codebase directory"):
parser.resolve_data_dir(tempdir)
def test_directory_names_with_spaces_are_rejected(self) -> None:
with self.assertRaisesRegex(ValueError, "unsafe directory name"):
parser.resolve_data_dir("input-data/Bad Name")
def test_directory_names_with_special_characters_are_rejected(self) -> None:
with self.assertRaisesRegex(ValueError, "unsafe directory name"):
parser.resolve_data_dir("input-data/bad@name")
def test_load_species_records_resolves_repo_relative_paths(self) -> None:
records = parser.load_species_records("input-data/InputFiles")
self.assertGreater(len(records), 0)
def test_duplicate_source_records_receive_unique_stable_slugs(self) -> None:
records = parser.load_species_records("input-data/InputFiles")
slug_by_source = {record.source_file: record.slug for record in records}
self.assertEqual(len(records), len(set(record.slug for record in records)))
self.assertEqual(
slug_by_source["Red Snapper_SLH_Outline2012_0722.txt"],
"red-snapper-red-snapper-slh-outline2012-0722",
)
self.assertEqual(
slug_by_source["RedSnapper_SLH_2012_0830_combined.txt"],
"red-snapper-redsnapper-slh-2012-0830-combined",
)
self.assertEqual(
slug_by_source["Sailfin Molly SLH RGG.txt"],
"sailfin-molly-sailfin-molly-slh-rgg",
)
self.assertTrue(
slug_by_source["Sailfin_Molly SLH RGG.txt"].startswith(
"sailfin-molly-sailfin-molly-slh-rgg-"
)
)