438 lines
14 KiB
Python
438 lines
14 KiB
Python
from __future__ import annotations
|
||
|
||
from pathlib import Path
|
||
import re
|
||
|
||
|
||
ROOT = Path(__file__).resolve().parents[1]
|
||
THES = ROOT / "THES" / "INT_ANN.TXT"
|
||
OUT = ROOT / "latex" / "integration_and_hybridization_in_neural_network_modelling.bib"
|
||
|
||
MANUAL_OVERRIDES = {
|
||
"Farhat1986": {
|
||
"kind": "incollection",
|
||
"title": "Neural net models and optical computing: an overview",
|
||
"booktitle": "Hybrid and Optical Computing",
|
||
"editor": "Harold Szu",
|
||
"publisher": "SPIE",
|
||
"address": "Bellingham, Washington",
|
||
"volume": "634",
|
||
"pages": "277-306",
|
||
},
|
||
"Harmon1970": {
|
||
"kind": "incollection",
|
||
"title": "Neural subsystems: an interpretive summary",
|
||
"booktitle": "The Neurosciences Second Study Program",
|
||
"editor": "F. O. Schmitt",
|
||
"publisher": "Rockefeller University Press",
|
||
"address": "New York",
|
||
"pages": "486-494",
|
||
},
|
||
"HechtNielsen1986": {
|
||
"kind": "incollection",
|
||
"title": "Performance limits of optical, electro-optical, and electronic neurocomputers",
|
||
"booktitle": "Hybrid and Optical Computing",
|
||
"editor": "H. Szu",
|
||
"publisher": "SPIE",
|
||
"address": "Bellingham, Washington",
|
||
"volume": "634",
|
||
"pages": "277-306",
|
||
},
|
||
"Hopfield1982": {
|
||
"kind": "article",
|
||
"journal": "Proceedings of the National Academy of Sciences",
|
||
"volume": "79",
|
||
"pages": "2554-2558",
|
||
},
|
||
"Leven1987a": {
|
||
"kind": "phdthesis",
|
||
"title": "Choice and Neural process: A dissertation",
|
||
"school": "University of Texas at Arlington",
|
||
"note": "Chapter 5: Neural process and form -- mathematics and meaning.",
|
||
},
|
||
"Leven1987b": {
|
||
"kind": "inproceedings",
|
||
"title": "S.A.M.: a triune extension to the ART model",
|
||
"note": "Poster presentation at the North Texas State University Symposium on Neural Networks.",
|
||
},
|
||
"Levine1990": {
|
||
"kind": "unpublished",
|
||
"note": "To appear in Motivation, Emotion, and Goal Direction in Neural Networks, D. Levine and S. Leven, eds., Erlbaum, Hillsdale, New Jersey.",
|
||
},
|
||
"Lippmann1987": {
|
||
"kind": "article",
|
||
"journal": "IEEE ASSP Magazine",
|
||
"month": "apr",
|
||
"pages": "4-22",
|
||
},
|
||
"MacLean1970": {
|
||
"kind": "incollection",
|
||
"title": "The triune brain, emotion, and scientific bias",
|
||
"booktitle": "The Neurosciences Second Study Program",
|
||
"editor": "F. O. Schmitt",
|
||
"publisher": "Rockefeller University Press",
|
||
"address": "New York",
|
||
"pages": "486-494",
|
||
},
|
||
"Matsuoka1989": {
|
||
"author": "Matsuoka, T. and Hamada, H. and Nakatsu, R.",
|
||
},
|
||
"Neuroscience1988": {
|
||
"kind": "techreport",
|
||
"author": "{Metroplex Study Group on Computational Neuroscience}",
|
||
"institution": "North Texas Commission Regional Technology Program",
|
||
"note": "Report to the North Texas Commission Regional Technology Program.",
|
||
},
|
||
"Newell1976": {
|
||
"kind": "article",
|
||
"journal": "Communications of the ACM",
|
||
"volume": "19",
|
||
"number": "3",
|
||
"pages": "113-126",
|
||
},
|
||
"Nottebohm1989": {
|
||
"kind": "article",
|
||
"journal": "Scientific American",
|
||
"month": "feb",
|
||
"pages": "74-79",
|
||
},
|
||
"Pao1989": {
|
||
"kind": "book",
|
||
"publisher": "Addison-Wesley",
|
||
"address": "Reading, Massachusetts",
|
||
},
|
||
"Parker1985": {
|
||
"kind": "techreport",
|
||
"institution": "Massachusetts Institute of Technology, Center for Computational Research in Economics and Management Science",
|
||
"address": "Cambridge, Massachusetts",
|
||
"number": "TR-47",
|
||
"title": "Learning-logic",
|
||
},
|
||
"Rumelhart1986": {
|
||
"kind": "incollection",
|
||
"title": "Learning internal representations by back propagation",
|
||
"booktitle": "Parallel Distributed Processing",
|
||
"editor": "D. Rumelhart and J. McClelland and the PDP Research Group",
|
||
"publisher": "MIT Press",
|
||
"address": "Cambridge, Massachusetts",
|
||
"volume": "1",
|
||
"pages": "365-422",
|
||
},
|
||
"Simpson1988": {
|
||
"kind": "unpublished",
|
||
"note": "Submitted to CRC Critical Reviews in Artificial Intelligence.",
|
||
},
|
||
"Sontag1989": {
|
||
"kind": "inproceedings",
|
||
"title": "Back-propagation separates when perceptrons do",
|
||
"booktitle": "Proceedings of the IEEE/INNS International Joint Conference on Neural Networks (IJCNN-89) Vol. I",
|
||
"pages": "639-642",
|
||
},
|
||
"Tsutsumi1989": {
|
||
"kind": "inproceedings",
|
||
"title": "A multi-layered neural network composed of backprop. and Hopfield nets and internal space representation",
|
||
"booktitle": "Proceedings of the IEEE/INNS International Joint Conference on Neural Networks (IJCNN-89) Vol. I",
|
||
"pages": "507-512",
|
||
},
|
||
"Hewitt1985": {
|
||
"kind": "article",
|
||
"journal": "Byte",
|
||
"volume": "10",
|
||
"number": "4",
|
||
"pages": "223-242",
|
||
},
|
||
"Widrow1988": {
|
||
"kind": "article",
|
||
"journal": "IEEE Computer",
|
||
"volume": "21",
|
||
"number": "3",
|
||
"pages": "25-39",
|
||
},
|
||
"Charniak1985": {
|
||
"kind": "book",
|
||
"publisher": "Addison-Wesley",
|
||
"address": "Reading, Massachusetts",
|
||
"note": "701 pp.",
|
||
},
|
||
"Hebb1949": {
|
||
"kind": "book",
|
||
"publisher": "Wiley",
|
||
"address": "New York",
|
||
},
|
||
}
|
||
|
||
|
||
def clean_line(line: str) -> str:
|
||
line = re.sub(r"[\x00-\x1f]", "", line)
|
||
line = (
|
||
line.replace("<EFBFBD>", "'")
|
||
.replace("®", "'")
|
||
.replace("÷", "-")
|
||
.replace("`", "'")
|
||
)
|
||
return re.sub(r"\s+", " ", line).strip()
|
||
|
||
|
||
def load_entries() -> list[str]:
|
||
lines = THES.read_text(encoding="latin-1").splitlines()
|
||
start = next(i for i, line in enumerate(lines) if line.strip() == "BIBLIOGRAPHY")
|
||
chunks: list[list[str]] = []
|
||
current: list[str] = []
|
||
for raw in lines[start + 1 :]:
|
||
line = clean_line(raw)
|
||
if not line or re.fullmatch(r"[ivxlcdmIVXLCDM]+|\d+", line):
|
||
if current:
|
||
chunks.append(current)
|
||
current = []
|
||
continue
|
||
current.append(line)
|
||
if current:
|
||
chunks.append(current)
|
||
return [" ".join(chunk) for chunk in chunks]
|
||
|
||
|
||
def bib_key(entry: str, index: int) -> str:
|
||
match = re.match(r"([A-Za-z][A-Za-z\-\.\s,&']+?)\s+(\d{4}[a-z]?)\.", entry)
|
||
if match:
|
||
surname = re.sub(r"[^A-Za-z]", "", match.group(1).split(",")[0].split()[-1])
|
||
year = match.group(2)
|
||
return f"{surname}{year}"
|
||
return f"elsberryRef{index:03d}"
|
||
|
||
|
||
def entry_type(entry: str) -> str:
|
||
lowered = entry.lower()
|
||
if "dissertation" in lowered:
|
||
return "phdthesis"
|
||
if "personal communication" in lowered:
|
||
return "misc"
|
||
if "proceedings" in lowered or "conference" in lowered or "poster presentation" in lowered:
|
||
return "inproceedings"
|
||
if re.search(r"\b\d+\s*,\s*\d+\s*-\s*\d+\.?$", entry):
|
||
return "article"
|
||
if "press" in lowered or "books" in lowered or "wiley" in lowered or "addison-wesley" in lowered:
|
||
return "book"
|
||
if "journal" in lowered or "magazine" in lowered or "cybernetics" in lowered or "biosciences" in lowered:
|
||
return "article"
|
||
return "misc"
|
||
|
||
|
||
def split_author_year(entry: str) -> tuple[str, str, str]:
|
||
match = re.match(r"(.+?)\s+(\d{4}[a-z]?)\.\s+(.*)$", entry)
|
||
if not match:
|
||
return "Unknown", "0000", entry
|
||
return match.group(1).strip(), match.group(2), match.group(3).strip()
|
||
|
||
|
||
def split_title_note(rest: str) -> tuple[str, str]:
|
||
cues = (
|
||
"Proceedings",
|
||
"Proc.",
|
||
"In ",
|
||
"American ",
|
||
"Computer ",
|
||
"Applied ",
|
||
"Mathematical ",
|
||
"Studies ",
|
||
"International ",
|
||
"Rockefeller ",
|
||
"Bantam ",
|
||
"Wiley",
|
||
"SPIE",
|
||
"Biological ",
|
||
"Byte ",
|
||
"Communications ",
|
||
"Scientific ",
|
||
"Bell ",
|
||
"IEEE ",
|
||
"IRE ",
|
||
"Neural Networks ",
|
||
"Bull.",
|
||
"Addison-Wesley",
|
||
"Massachusetts Institute",
|
||
"Report ",
|
||
"Submitted ",
|
||
"To appear ",
|
||
"Unpublished ",
|
||
"University ",
|
||
"Poster ",
|
||
"'North",
|
||
)
|
||
for cue in cues:
|
||
pattern = rf"^(?P<title>.+?)\.\s+(?P<note>{re.escape(cue)}.*)$"
|
||
match = re.match(pattern, rest)
|
||
if match:
|
||
return match.group("title").strip(), match.group("note").strip()
|
||
title = rest.split(".")[0].strip()
|
||
note = rest[len(title):].strip().lstrip(".").strip()
|
||
return title, note
|
||
|
||
|
||
def normalize_author(author: str) -> str:
|
||
author = re.sub(r"\s+", " ", author.strip().rstrip("."))
|
||
author = author.replace("Foo,Y.", "Foo, Y.")
|
||
author = author.replace("Pao,Y.-H.", "Pao, Y.-H.")
|
||
author = author.replace("F. O. Scmitt", "F. O. Schmitt")
|
||
return author
|
||
|
||
|
||
def _field(name: str, value: str) -> str:
|
||
return f" {name} = {{{value}}}"
|
||
|
||
|
||
def _extract_inproceedings_fields(note: str) -> list[str]:
|
||
fields: list[str] = []
|
||
proceedings_match = re.search(
|
||
r"(Proceedings of .*?(?:\(.*?\))?(?:\s+Vol\.\s*[IVX0-9]+)?)"
|
||
r"(?:\.\s*|,\s*(?:pp\.|\d)|$)",
|
||
note,
|
||
flags=re.IGNORECASE,
|
||
)
|
||
if proceedings_match:
|
||
booktitle = proceedings_match.group(1).rstrip(" .,;")
|
||
fields.append(_field("booktitle", booktitle))
|
||
pages_match = re.search(r"(\d+\s*-\s*\d+)", note)
|
||
if pages_match:
|
||
fields.append(_field("pages", pages_match.group(1).replace(" ", "")))
|
||
return fields
|
||
|
||
|
||
def _extract_article_fields(note: str) -> list[str]:
|
||
fields: list[str] = []
|
||
journal_match = re.match(r"(.+?)\s+(\d+),\s*(\d+\s*-\s*\d+)\.?\s*$", note)
|
||
if journal_match:
|
||
fields.append(_field("journal", journal_match.group(1).rstrip(" .,;")))
|
||
fields.append(_field("volume", journal_match.group(2)))
|
||
fields.append(_field("pages", journal_match.group(3).replace(" ", "")))
|
||
return fields
|
||
journal_match = re.match(r"(.+?)\s+(\d+)\s*,\s*(\d+\s*-\s*\d+)\.?\s*$", note)
|
||
if journal_match:
|
||
fields.append(_field("journal", journal_match.group(1).rstrip(" .,;")))
|
||
fields.append(_field("volume", journal_match.group(2)))
|
||
fields.append(_field("pages", journal_match.group(3).replace(" ", "")))
|
||
return fields
|
||
|
||
|
||
def _extract_book_fields(note: str) -> list[str]:
|
||
fields: list[str] = []
|
||
publisher_match = re.match(r"([^,.]+(?:Press|Books|Wiley|Addison-Wesley|SPIE|University Press))[,\.]\s*(.*)$", note)
|
||
if publisher_match:
|
||
fields.append(_field("publisher", publisher_match.group(1).strip()))
|
||
if publisher_match.group(2).strip():
|
||
fields.append(_field("address", publisher_match.group(2).strip(" .")))
|
||
return fields
|
||
if note:
|
||
fields.append(_field("note", note))
|
||
return fields
|
||
|
||
|
||
def _extract_phdthesis_fields(note: str) -> list[str]:
|
||
fields: list[str] = []
|
||
school_match = re.search(r"(The University of .*?|.*?University.*?)\.", note)
|
||
if school_match:
|
||
fields.append(_field("school", school_match.group(1).strip()))
|
||
remainder = note.replace(school_match.group(0), "", 1).strip(" .")
|
||
if remainder:
|
||
fields.append(_field("note", remainder))
|
||
return fields
|
||
if note:
|
||
fields.append(_field("note", note))
|
||
return fields
|
||
|
||
|
||
def _extra_fields(kind: str, note: str) -> list[str]:
|
||
normalized = note.replace("In '", "In ").replace(",'", ",")
|
||
if not normalized:
|
||
return []
|
||
if kind == "inproceedings":
|
||
fields = _extract_inproceedings_fields(normalized)
|
||
if fields:
|
||
return fields
|
||
if kind == "article":
|
||
fields = _extract_article_fields(normalized)
|
||
if fields:
|
||
return fields
|
||
if kind == "book":
|
||
return _extract_book_fields(normalized)
|
||
if kind == "phdthesis":
|
||
return _extract_phdthesis_fields(normalized)
|
||
return [_field("note", normalized)]
|
||
|
||
|
||
def apply_override(
|
||
key: str,
|
||
kind: str,
|
||
author: str,
|
||
year: str,
|
||
title: str,
|
||
note: str,
|
||
fields: list[str],
|
||
) -> tuple[str, str, str, str, list[str]]:
|
||
override = MANUAL_OVERRIDES.get(key)
|
||
if not override:
|
||
return kind, author, year, title, fields
|
||
|
||
kind = override.get("kind", kind)
|
||
author = override.get("author", author)
|
||
title = override.get("title", title)
|
||
|
||
ordered_fields = [
|
||
_field("author", author),
|
||
_field("year", year),
|
||
_field("title", title),
|
||
]
|
||
for name in (
|
||
"journal",
|
||
"booktitle",
|
||
"editor",
|
||
"publisher",
|
||
"institution",
|
||
"school",
|
||
"address",
|
||
"volume",
|
||
"number",
|
||
"pages",
|
||
"month",
|
||
"note",
|
||
):
|
||
value = override.get(name)
|
||
if value:
|
||
ordered_fields.append(_field(name, value))
|
||
return kind, author, year, title, ordered_fields
|
||
|
||
|
||
def to_bibtex(entry: str, index: int) -> str:
|
||
key = bib_key(entry, index)
|
||
kind = entry_type(entry)
|
||
author, year, rest = split_author_year(entry)
|
||
author = normalize_author(author)
|
||
title, note = split_title_note(rest)
|
||
fields = [
|
||
_field("author", author),
|
||
_field("year", year),
|
||
_field("title", title),
|
||
]
|
||
fields.extend(_extra_fields(kind, note))
|
||
kind, author, year, title, fields = apply_override(
|
||
key, kind, author, year, title, note, fields
|
||
)
|
||
return "@{kind}{{{key},\n{fields}\n}}".format(
|
||
kind=kind,
|
||
key=key,
|
||
fields=",\n".join(fields),
|
||
)
|
||
|
||
|
||
def main() -> int:
|
||
entries = load_entries()
|
||
OUT.parent.mkdir(parents=True, exist_ok=True)
|
||
OUT.write_text(
|
||
"\n\n".join(to_bibtex(entry, idx) for idx, entry in enumerate(entries, start=1)) + "\n",
|
||
encoding="utf-8",
|
||
)
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(main())
|