diff --git a/src/doclift/convert.py b/src/doclift/convert.py
index b2489c6..fe43260 100755
--- a/src/doclift/convert.py
+++ b/src/doclift/convert.py
@@ -4,6 +4,7 @@ from pathlib import Path
 
 from .legacy_doc import (
     build_layout_manifest,
+    classify_document,
     clean_text,
     collect_figure_assets,
     extract_references,
@@ -27,6 +28,7 @@ def convert_doc(source_path: Path, out_root: Path, figure_assets: list | None =
     raw = run_catdoc(source_path)
     cleaned = clean_text(raw)
     title = extract_title(cleaned, source_path.stem)
+    document_kind = classify_document(cleaned, source_path)
     body = strip_title(cleaned, title)
     layout_body = normalize_text_preserve_layout(strip_title(raw, title))
     tables = extract_tables(layout_body)
@@ -64,6 +66,7 @@ def convert_doc(source_path: Path, out_root: Path, figure_assets: list | None =
     return DocumentBundle(
         document_id=slugify(title),
         title=title,
+        document_kind=document_kind,
         source_path=str(source_path),
         output_dir=str(doc_out),
         markdown_path=str(markdown_path),
@@ -98,6 +101,7 @@ def convert_directory(source_root: Path, out_root: Path, asset_root: Path | None
                     {
                         "document_id": bundle.document_id,
                         "title": bundle.title,
+                        "document_kind": bundle.document_kind,
                         "table_count": bundle.table_count,
                         "figure_reference_count": bundle.figure_reference_count,
                     }
diff --git a/src/doclift/legacy_doc.py b/src/doclift/legacy_doc.py
index 98e7ee9..2391390 100755
--- a/src/doclift/legacy_doc.py
+++ b/src/doclift/legacy_doc.py
@@ -55,6 +55,42 @@ def normalize_text_preserve_layout(text: str) -> str:
 
 def extract_title(text: str, fallback: str) -> str:
     lines = text.splitlines()
+    nonempty = [line.strip() for line in lines if line.strip()]
+    if not nonempty:
+        return fallback
+
+    joined = " ".join(nonempty[:8])
+    upper_joined = joined.upper()
+    first = nonempty[0]
+
+    if first.upper().startswith("MAKE-UP EXAM"):
+        return first
+    if first.upper() in {"EXAM I", "EXAM II"}:
+        return first
+    if "FINAL EXAM" in upper_joined:
+        for line in nonempty[:8]:
+            if "FINAL EXAM" in line.upper():
+                return line
+    if "CLASS NOTES" in upper_joined:
+        title_parts: list[str] = []
+        started = False
+        for line in nonempty[:6]:
+            upper = line.upper()
+            if upper.startswith("MARB "):
+                continue
+            if upper == "CLASS NOTES":
+                break
+            if upper in {"SPRING 2000", "SPRING 1999", "SPRING 2001"}:
+                continue
+            started = True
+            title_parts.append(line)
+        if started:
+            return " ".join(title_parts)
+    if first.upper().startswith("MARB ") and len(nonempty) > 1:
+        second = nonempty[1]
+        if re.match(r"^\d+\s+Credit\b", second, re.IGNORECASE):
+            return first
+
     for index, line in enumerate(lines):
         stripped = line.strip()
         if not stripped:
@@ -71,12 +107,15 @@ def extract_title(text: str, fallback: str) -> str:
             return stripped
         if stripped.upper() in {
             "SPRING 2000",
+            "SPRING 1999",
             "MARB 401",
             "MARB 482 SEMINAR IN MARINE BIOLOGY",
             "COURSE SYLLABUS",
             "EXAM I",
             "EXAM II",
             "FINAL EXAM SPRING 1999",
+            "CLASS NOTES",
+            "OF",
         }:
             continue
         if stripped.startswith(("February ", "April ")):
@@ -85,6 +124,24 @@ def extract_title(text: str, fallback: str) -> str:
     return fallback
 
 
+def classify_document(text: str, source_path: Path) -> str:
+    nonempty = [line.strip() for line in text.splitlines() if line.strip()]
+    joined = " ".join(nonempty[:10]).upper()
+    name = source_path.name.upper()
+
+    if name.startswith("SYLLABUS") or "COURSE SYLLABUS" in joined:
+        return "syllabus"
+    if "FINAL EXAM" in joined:
+        return "final_exam"
+    if name.startswith("EXAM") or name.startswith("MAKE-UP") or re.match(r"^EXAM\b", joined):
+        return "exam"
+    if "CLASS NOTES" in joined or name == "COVER.DOC":
+        return "cover_notes"
+    if re.match(r"^LECTURE\s+\d+\.", joined):
+        return "lecture"
+    return "document"
+
+
 def strip_title(text: str, title: str) -> str:
     lines = text.splitlines()
     normalized_title = " ".join(title.split())
diff --git a/src/doclift/schemas.py b/src/doclift/schemas.py
index 286f220..a69ff09 100755
--- a/src/doclift/schemas.py
+++ b/src/doclift/schemas.py
@@ -34,6 +34,7 @@ class FigureAsset(BaseModel):
 class DocumentBundle(BaseModel):
     document_id: str
     title: str
+    document_kind: str = "document"
     source_path: str
     output_dir: str
     markdown_path: str
diff --git a/tests/test_legacy_doc.py b/tests/test_legacy_doc.py
index 468c7be..345a6d4 100755
--- a/tests/test_legacy_doc.py
+++ b/tests/test_legacy_doc.py
@@ -1,4 +1,13 @@
-from doclift.legacy_doc import FigureAsset, extract_references, extract_tables, link_related_assets
+from pathlib import Path
+
+from doclift.legacy_doc import (
+    FigureAsset,
+    classify_document,
+    extract_references,
+    extract_tables,
+    extract_title,
+    link_related_assets,
+)
 
 
 def test_extract_references_dedupes() -> None:
@@ -45,3 +54,35 @@ def test_link_related_assets_matches_explicit_figure_refs() -> None:
     ]
     matched = link_related_assets(["Fig. 5.1"], assets)
     assert [asset.asset_id for asset in matched] == ["a1"]
+
+
+def test_extract_title_prefers_exam_headers() -> None:
+    text = "\n".join(
+        [
+            "EXAM I",
+            "February 25, 1999",
+            "Answer three of the following essay questions.",
+        ]
+    )
+    assert extract_title(text, "fallback") == "EXAM I"
+
+
+def test_extract_title_handles_cover_sheet() -> None:
+    text = "\n".join(
+        [
+            "MARB 401",
+            "PHYSIOLOGICAL ECOLOGY",
+            "OF",
+            "MARINE MAMMALS",
+            "CLASS NOTES",
+            "SPRING 2000",
+        ]
+    )
+    assert extract_title(text, "fallback") == "PHYSIOLOGICAL ECOLOGY OF MARINE MAMMALS"
+
+
+def test_classify_document_kinds() -> None:
+    assert classify_document("EXAM II\nApril 6, 1999\n", Path("Exam II-99.doc")) == "exam"
+    assert classify_document("FINAL EXAM SPRING 1999\nAnswer 3 questions\n", Path("final exam.991.doc")) == "final_exam"
+    assert classify_document("MARB 401\nPHYSIOLOGICAL ECOLOGY\nOF\nMARINE MAMMALS\nCLASS NOTES\n", Path("COVER.doc")) == "cover_notes"
+    assert classify_document("SPRING 2000\nMARB 401\nPhysiological Ecology of Marine Mammals\n", Path("Syllabus 401.001.doc")) == "syllabus"