110 lines
4.0 KiB
Python
110 lines
4.0 KiB
Python
from __future__ import annotations
|
|
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
from ecospecies_api import parser
|
|
|
|
|
|
class ParserPathResolutionTests(unittest.TestCase):
|
|
def test_ecospecies_code_is_treated_as_flelmr_code(self) -> None:
|
|
metadata = parser.extract_metadata(
|
|
[
|
|
"Title: Test Fish",
|
|
"EcoSpecies Code: 4242",
|
|
]
|
|
)
|
|
|
|
self.assertEqual(metadata["ecospecies code"], "4242")
|
|
self.assertEqual(metadata["flelmr"], "4242")
|
|
|
|
def test_title_case_headings_are_split_into_sections(self) -> None:
|
|
sections = parser.split_sections(
|
|
[
|
|
"Species profile: American oyster (Crassostrea virginica)",
|
|
"",
|
|
"Classification",
|
|
" Phylum: Mollusca",
|
|
"Value",
|
|
"Commercial: Important fishery.",
|
|
"Habitat",
|
|
"Type: Estuarine.",
|
|
]
|
|
)
|
|
|
|
self.assertEqual(
|
|
[section.heading for section in sections],
|
|
["HEADER", "Classification", "Value", "Habitat"],
|
|
)
|
|
|
|
def test_colon_terminated_title_case_headings_are_split_into_sections(self) -> None:
|
|
sections = parser.split_sections(
|
|
[
|
|
"Ecological Interactions and Notes",
|
|
"Predator text.",
|
|
"",
|
|
"Reference Numbers:",
|
|
"",
|
|
"Citations:",
|
|
"7, Ahmed, M. 1975. Speciation in living oysters.",
|
|
]
|
|
)
|
|
|
|
self.assertEqual(
|
|
[section.heading for section in sections],
|
|
["HEADER", "Citations"],
|
|
)
|
|
|
|
def test_default_data_dir_uses_in_repo_path_without_spaces(self) -> None:
|
|
with patch.dict("os.environ", {}, clear=True):
|
|
resolved = Path(parser.get_default_data_dir())
|
|
|
|
self.assertEqual(resolved, parser.get_repo_root() / "input-data" / "InputFiles")
|
|
|
|
def test_relative_override_must_stay_within_repo(self) -> None:
|
|
with self.assertRaisesRegex(ValueError, "within the codebase directory"):
|
|
parser.resolve_data_dir("../input-data/InputFiles")
|
|
|
|
def test_absolute_override_outside_repo_is_rejected(self) -> None:
|
|
with tempfile.TemporaryDirectory() as tempdir:
|
|
with self.assertRaisesRegex(ValueError, "within the codebase directory"):
|
|
parser.resolve_data_dir(tempdir)
|
|
|
|
def test_directory_names_with_spaces_are_rejected(self) -> None:
|
|
with self.assertRaisesRegex(ValueError, "unsafe directory name"):
|
|
parser.resolve_data_dir("input-data/Bad Name")
|
|
|
|
def test_directory_names_with_special_characters_are_rejected(self) -> None:
|
|
with self.assertRaisesRegex(ValueError, "unsafe directory name"):
|
|
parser.resolve_data_dir("input-data/bad@name")
|
|
|
|
def test_load_species_records_resolves_repo_relative_paths(self) -> None:
|
|
records = parser.load_species_records("input-data/InputFiles")
|
|
|
|
self.assertGreater(len(records), 0)
|
|
|
|
def test_duplicate_source_records_receive_unique_stable_slugs(self) -> None:
|
|
records = parser.load_species_records("input-data/InputFiles")
|
|
slug_by_source = {record.source_file: record.slug for record in records}
|
|
|
|
self.assertEqual(len(records), len(set(record.slug for record in records)))
|
|
self.assertEqual(
|
|
slug_by_source["Red Snapper_SLH_Outline2012_0722.txt"],
|
|
"red-snapper-red-snapper-slh-outline2012-0722",
|
|
)
|
|
self.assertEqual(
|
|
slug_by_source["RedSnapper_SLH_2012_0830_combined.txt"],
|
|
"red-snapper-redsnapper-slh-2012-0830-combined",
|
|
)
|
|
self.assertEqual(
|
|
slug_by_source["Sailfin Molly SLH RGG.txt"],
|
|
"sailfin-molly-sailfin-molly-slh-rgg",
|
|
)
|
|
self.assertTrue(
|
|
slug_by_source["Sailfin_Molly SLH RGG.txt"].startswith(
|
|
"sailfin-molly-sailfin-molly-slh-rgg-"
|
|
)
|
|
)
|