Add doclift claim tournament framework
This commit is contained in:
parent
8201dd83ee
commit
54ed7568b6
|
|
@ -0,0 +1,30 @@
|
||||||
|
# Doclift Claim Tournament
|
||||||
|
|
||||||
|
This benchmark is a small evaluation harness for comparing multiple
|
||||||
|
doclift prose-claim extraction strategies before changing the default
|
||||||
|
GroundRecall import behavior.
|
||||||
|
|
||||||
|
Current tracks:
|
||||||
|
|
||||||
|
- `conservative`: prefers higher precision and sentence-level claims.
|
||||||
|
- `broad`: allows paragraph-level claims and shorter sentence candidates to
|
||||||
|
improve recall.
|
||||||
|
|
||||||
|
Judge criteria:
|
||||||
|
|
||||||
|
- maximize F1 against the benchmark gold claims
|
||||||
|
- prefer higher recall when F1 ties
|
||||||
|
- penalize meta or identity-claim noise
|
||||||
|
- prefer predicted claim counts close to the gold-set size
|
||||||
|
|
||||||
|
Fixture location:
|
||||||
|
|
||||||
|
- `tests/fixtures/doclift_claim_eval/`
|
||||||
|
|
||||||
|
Primary entrypoint:
|
||||||
|
|
||||||
|
- `groundrecall.doclift_claim_tournament.evaluate_doclift_claim_tracks(...)`
|
||||||
|
|
||||||
|
This is intentionally small and deterministic. It is meant to support an
|
||||||
|
iterative tournament workflow, not to serve as a full evaluation platform by
|
||||||
|
itself.
|
||||||
|
|
@ -0,0 +1,175 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .groundrecall_source_adapters.doclift_bundle import DocliftBundleSourceAdapter
|
||||||
|
|
||||||
|
|
||||||
|
_TOKEN_RE = re.compile(r"[a-z0-9]+")
|
||||||
|
_META_PATTERNS = (
|
||||||
|
"is a web_article in the imported doclift bundle",
|
||||||
|
"is a bibliography_topic in the imported doclift bundle",
|
||||||
|
"this essay has been transferred here",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ClaimTrackScore:
|
||||||
|
strategy: str
|
||||||
|
predicted_claims: list[str]
|
||||||
|
gold_claims: list[str]
|
||||||
|
matches: int
|
||||||
|
precision: float
|
||||||
|
recall: float
|
||||||
|
f1: float
|
||||||
|
meta_noise: int
|
||||||
|
|
||||||
|
def as_dict(self) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"strategy": self.strategy,
|
||||||
|
"predicted_claims": list(self.predicted_claims),
|
||||||
|
"gold_claims": list(self.gold_claims),
|
||||||
|
"matches": self.matches,
|
||||||
|
"precision": self.precision,
|
||||||
|
"recall": self.recall,
|
||||||
|
"f1": self.f1,
|
||||||
|
"meta_noise": self.meta_noise,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_tokens(text: str) -> set[str]:
|
||||||
|
return set(_TOKEN_RE.findall(text.lower()))
|
||||||
|
|
||||||
|
|
||||||
|
def _claim_overlap(a: str, b: str) -> float:
|
||||||
|
left = _normalize_tokens(a)
|
||||||
|
right = _normalize_tokens(b)
|
||||||
|
if not left or not right:
|
||||||
|
return 0.0
|
||||||
|
return len(left & right) / len(left | right)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_meta_noise(text: str) -> bool:
|
||||||
|
lowered = text.lower()
|
||||||
|
return any(pattern in lowered for pattern in _META_PATTERNS)
|
||||||
|
|
||||||
|
|
||||||
|
def _score_track(predicted_claims: list[str], gold_claims: list[str], strategy: str) -> ClaimTrackScore:
|
||||||
|
matched_gold: set[int] = set()
|
||||||
|
matches = 0
|
||||||
|
for predicted in predicted_claims:
|
||||||
|
best_index = None
|
||||||
|
best_score = 0.0
|
||||||
|
for index, gold in enumerate(gold_claims):
|
||||||
|
if index in matched_gold:
|
||||||
|
continue
|
||||||
|
overlap = _claim_overlap(predicted, gold)
|
||||||
|
if overlap > best_score:
|
||||||
|
best_score = overlap
|
||||||
|
best_index = index
|
||||||
|
if best_index is not None and best_score >= 0.34:
|
||||||
|
matched_gold.add(best_index)
|
||||||
|
matches += 1
|
||||||
|
|
||||||
|
precision = matches / len(predicted_claims) if predicted_claims else 0.0
|
||||||
|
recall = matches / len(gold_claims) if gold_claims else 0.0
|
||||||
|
f1 = (2 * precision * recall / (precision + recall)) if precision and recall else 0.0
|
||||||
|
meta_noise = sum(1 for claim in predicted_claims if _is_meta_noise(claim))
|
||||||
|
return ClaimTrackScore(
|
||||||
|
strategy=strategy,
|
||||||
|
predicted_claims=predicted_claims,
|
||||||
|
gold_claims=gold_claims,
|
||||||
|
matches=matches,
|
||||||
|
precision=precision,
|
||||||
|
recall=recall,
|
||||||
|
f1=f1,
|
||||||
|
meta_noise=meta_noise,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _winner_key(score: ClaimTrackScore) -> tuple[float, float, float, float]:
|
||||||
|
return (
|
||||||
|
score.f1,
|
||||||
|
score.recall,
|
||||||
|
-float(score.meta_noise),
|
||||||
|
-abs(len(score.predicted_claims) - len(score.gold_claims)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_doclift_claim_tracks(bundle_root: str | Path, benchmark_path: str | Path) -> dict[str, Any]:
|
||||||
|
base = Path(bundle_root)
|
||||||
|
benchmark = json.loads(Path(benchmark_path).read_text(encoding="utf-8"))
|
||||||
|
manifest = json.loads((base / "manifest.json").read_text(encoding="utf-8"))
|
||||||
|
adapter = DocliftBundleSourceAdapter()
|
||||||
|
documents = {str(item.get("document_id")): item for item in manifest.get("documents", []) if isinstance(item, dict)}
|
||||||
|
|
||||||
|
per_document: list[dict[str, Any]] = []
|
||||||
|
aggregate: dict[str, dict[str, float]] = {
|
||||||
|
"conservative": {"matches": 0.0, "predicted": 0.0, "gold": 0.0, "meta_noise": 0.0},
|
||||||
|
"broad": {"matches": 0.0, "predicted": 0.0, "gold": 0.0, "meta_noise": 0.0},
|
||||||
|
}
|
||||||
|
|
||||||
|
for entry in benchmark.get("documents", []):
|
||||||
|
document_id = str(entry["document_id"])
|
||||||
|
document = documents[document_id]
|
||||||
|
gold_claims = [str(item).strip() for item in entry.get("gold_claims", []) if str(item).strip()]
|
||||||
|
track_scores = []
|
||||||
|
for strategy in ("conservative", "broad"):
|
||||||
|
predicted_claims = adapter.extract_document_claims(base, document, strategy=strategy, limit=6)
|
||||||
|
score = _score_track(predicted_claims, gold_claims, strategy)
|
||||||
|
track_scores.append(score)
|
||||||
|
aggregate[strategy]["matches"] += score.matches
|
||||||
|
aggregate[strategy]["predicted"] += len(score.predicted_claims)
|
||||||
|
aggregate[strategy]["gold"] += len(score.gold_claims)
|
||||||
|
aggregate[strategy]["meta_noise"] += score.meta_noise
|
||||||
|
winner = max(track_scores, key=_winner_key)
|
||||||
|
per_document.append(
|
||||||
|
{
|
||||||
|
"document_id": document_id,
|
||||||
|
"title": str(document.get("title") or ""),
|
||||||
|
"winner": winner.strategy,
|
||||||
|
"tracks": [score.as_dict() for score in track_scores],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
judge_summary: dict[str, Any] = {"tracks": {}}
|
||||||
|
for strategy, totals in aggregate.items():
|
||||||
|
precision = totals["matches"] / totals["predicted"] if totals["predicted"] else 0.0
|
||||||
|
recall = totals["matches"] / totals["gold"] if totals["gold"] else 0.0
|
||||||
|
f1 = (2 * precision * recall / (precision + recall)) if precision and recall else 0.0
|
||||||
|
judge_summary["tracks"][strategy] = {
|
||||||
|
"matches": int(totals["matches"]),
|
||||||
|
"predicted_claims": int(totals["predicted"]),
|
||||||
|
"gold_claims": int(totals["gold"]),
|
||||||
|
"precision": precision,
|
||||||
|
"recall": recall,
|
||||||
|
"f1": f1,
|
||||||
|
"meta_noise": int(totals["meta_noise"]),
|
||||||
|
}
|
||||||
|
|
||||||
|
judge_summary["winner"] = max(
|
||||||
|
judge_summary["tracks"].items(),
|
||||||
|
key=lambda item: (
|
||||||
|
item[1]["f1"],
|
||||||
|
item[1]["recall"],
|
||||||
|
-float(item[1]["meta_noise"]),
|
||||||
|
-abs(item[1]["predicted_claims"] - item[1]["gold_claims"]),
|
||||||
|
),
|
||||||
|
)[0]
|
||||||
|
judge_summary["criteria"] = [
|
||||||
|
"maximize f1 against gold claims",
|
||||||
|
"prefer higher recall when f1 ties",
|
||||||
|
"penalize meta/identity claim noise",
|
||||||
|
"prefer predicted claim counts close to gold-set size",
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"bundle_root": str(base),
|
||||||
|
"benchmark_path": str(benchmark_path),
|
||||||
|
"per_document": per_document,
|
||||||
|
"judge_summary": judge_summary,
|
||||||
|
}
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
@ -10,10 +11,30 @@ from .base import DiscoveredImportSource, StructuredImportRows, register_source_
|
||||||
class DocliftBundleSourceAdapter:
|
class DocliftBundleSourceAdapter:
|
||||||
name = "doclift_bundle"
|
name = "doclift_bundle"
|
||||||
|
|
||||||
|
_PROSE_SENTENCE_SPLIT = re.compile(r"(?<=[.!?])\s+")
|
||||||
|
_METADATA_PREFIXES = (
|
||||||
|
"posted by",
|
||||||
|
"share to ",
|
||||||
|
"email this",
|
||||||
|
"blogthis",
|
||||||
|
"labels:",
|
||||||
|
"post a comment",
|
||||||
|
"older post",
|
||||||
|
"newer post",
|
||||||
|
"subscribe to",
|
||||||
|
"copyright",
|
||||||
|
"[last update",
|
||||||
|
"this essay has been transferred here",
|
||||||
|
)
|
||||||
|
|
||||||
def _resolve_bundle_path(self, base: Path, value: str | Path | None) -> Path:
|
def _resolve_bundle_path(self, base: Path, value: str | Path | None) -> Path:
|
||||||
if value is None:
|
if value is None:
|
||||||
return Path()
|
return Path()
|
||||||
|
if isinstance(value, str) and not value.strip():
|
||||||
|
return Path()
|
||||||
path = Path(value)
|
path = Path(value)
|
||||||
|
if not str(path):
|
||||||
|
return Path()
|
||||||
if path.is_absolute():
|
if path.is_absolute():
|
||||||
return path
|
return path
|
||||||
return base / path
|
return base / path
|
||||||
|
|
@ -39,6 +60,131 @@ class DocliftBundleSourceAdapter:
|
||||||
return [chunk for chunk in payload if isinstance(chunk, dict)]
|
return [chunk for chunk in payload if isinstance(chunk, dict)]
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def _load_markdown_text(self, base: Path, document: dict) -> str:
|
||||||
|
markdown_path = self._resolve_bundle_path(base, document.get("markdown_path", ""))
|
||||||
|
if not markdown_path.exists():
|
||||||
|
return ""
|
||||||
|
return markdown_path.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
def _normalize_inline_text(self, value: str) -> str:
|
||||||
|
text = value.replace("\xa0", " ")
|
||||||
|
text = re.sub(r"\[[^\]]+\]\([^)]+\)", "", text)
|
||||||
|
text = re.sub(r"\[[^\]]+\]", "", text)
|
||||||
|
text = re.sub(r"\s+", " ", text)
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
def _looks_like_metadata_line(self, value: str) -> bool:
|
||||||
|
lowered = value.strip().lower()
|
||||||
|
if not lowered:
|
||||||
|
return True
|
||||||
|
if any(lowered.startswith(prefix) for prefix in self._METADATA_PREFIXES):
|
||||||
|
return True
|
||||||
|
if lowered in {"home", "sandwalk", "comments", "recent comments", "loading..."}:
|
||||||
|
return True
|
||||||
|
if "property='og:" in lowered or lowered.startswith("http"):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _is_claim_candidate(self, cleaned: str, *, title: str = "", strategy: str = "conservative") -> bool:
|
||||||
|
lowered = cleaned.lower()
|
||||||
|
normalized_title = self._normalize_inline_text(title).lower()
|
||||||
|
min_length = 70 if strategy == "conservative" else 40
|
||||||
|
if len(cleaned) < min_length:
|
||||||
|
return False
|
||||||
|
if strategy == "conservative" and len(cleaned) > 360:
|
||||||
|
return False
|
||||||
|
if strategy == "broad" and len(cleaned) > 520:
|
||||||
|
return False
|
||||||
|
if any(lowered.startswith(prefix) for prefix in self._METADATA_PREFIXES):
|
||||||
|
return False
|
||||||
|
if normalized_title and lowered == normalized_title:
|
||||||
|
return False
|
||||||
|
if cleaned.count(" ") < 8:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _extract_claim_sentences_from_paragraphs(
|
||||||
|
self,
|
||||||
|
paragraphs: list[str],
|
||||||
|
*,
|
||||||
|
title: str = "",
|
||||||
|
limit: int = 4,
|
||||||
|
strategy: str = "conservative",
|
||||||
|
) -> list[str]:
|
||||||
|
claims: list[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for paragraph in paragraphs:
|
||||||
|
normalized_paragraph = self._normalize_inline_text(paragraph)
|
||||||
|
if len(normalized_paragraph) < 80:
|
||||||
|
continue
|
||||||
|
if strategy == "broad":
|
||||||
|
paragraph_key = normalized_paragraph.lower()
|
||||||
|
if self._is_claim_candidate(normalized_paragraph, title=title, strategy=strategy) and paragraph_key not in seen:
|
||||||
|
seen.add(paragraph_key)
|
||||||
|
claims.append(normalized_paragraph)
|
||||||
|
if len(claims) >= limit:
|
||||||
|
return claims
|
||||||
|
for sentence in self._PROSE_SENTENCE_SPLIT.split(normalized_paragraph):
|
||||||
|
cleaned = self._normalize_inline_text(sentence)
|
||||||
|
lowered = cleaned.lower()
|
||||||
|
if not self._is_claim_candidate(cleaned, title=title, strategy=strategy):
|
||||||
|
continue
|
||||||
|
if lowered in seen:
|
||||||
|
continue
|
||||||
|
seen.add(lowered)
|
||||||
|
claims.append(cleaned)
|
||||||
|
if len(claims) >= limit:
|
||||||
|
return claims
|
||||||
|
return claims
|
||||||
|
|
||||||
|
def _extract_claim_sentences(self, markdown_text: str, *, title: str = "", limit: int = 4, strategy: str = "conservative") -> list[str]:
|
||||||
|
paragraphs: list[str] = []
|
||||||
|
current: list[str] = []
|
||||||
|
for raw_line in markdown_text.splitlines():
|
||||||
|
line = raw_line.strip()
|
||||||
|
if not line:
|
||||||
|
if current:
|
||||||
|
paragraphs.append(" ".join(current))
|
||||||
|
current = []
|
||||||
|
continue
|
||||||
|
if line.startswith("#") or line.startswith("![") or line.startswith("|"):
|
||||||
|
continue
|
||||||
|
if self._looks_like_metadata_line(line):
|
||||||
|
continue
|
||||||
|
if len(line) < 40:
|
||||||
|
continue
|
||||||
|
current.append(line)
|
||||||
|
if current:
|
||||||
|
paragraphs.append(" ".join(current))
|
||||||
|
if strategy == "broad":
|
||||||
|
broad_claims = self._extract_claim_sentences_from_paragraphs(
|
||||||
|
paragraphs,
|
||||||
|
title=title,
|
||||||
|
limit=max(limit * 2, limit),
|
||||||
|
strategy="broad",
|
||||||
|
)
|
||||||
|
if len(broad_claims) >= limit:
|
||||||
|
return broad_claims[:limit]
|
||||||
|
return broad_claims
|
||||||
|
return self._extract_claim_sentences_from_paragraphs(
|
||||||
|
paragraphs,
|
||||||
|
title=title,
|
||||||
|
limit=limit,
|
||||||
|
strategy="conservative",
|
||||||
|
)
|
||||||
|
|
||||||
|
def extract_document_claims(
|
||||||
|
self,
|
||||||
|
base: Path,
|
||||||
|
document: dict,
|
||||||
|
*,
|
||||||
|
strategy: str = "conservative",
|
||||||
|
limit: int = 4,
|
||||||
|
) -> list[str]:
|
||||||
|
markdown_text = self._load_markdown_text(base, document)
|
||||||
|
title = str(document.get("title") or "")
|
||||||
|
return self._extract_claim_sentences(markdown_text, title=title, limit=limit, strategy=strategy)
|
||||||
|
|
||||||
def discover(self, root: str | Path) -> list[DiscoveredImportSource]:
|
def discover(self, root: str | Path) -> list[DiscoveredImportSource]:
|
||||||
base = Path(root)
|
base = Path(root)
|
||||||
rows: list[DiscoveredImportSource] = []
|
rows: list[DiscoveredImportSource] = []
|
||||||
|
|
@ -108,7 +254,7 @@ class DocliftBundleSourceAdapter:
|
||||||
artifact_id = artifact_by_path.get(str(relative_markdown), "")
|
artifact_id = artifact_by_path.get(str(relative_markdown), "")
|
||||||
figures_path = self._resolve_bundle_path(base, document.get("figures_path", ""))
|
figures_path = self._resolve_bundle_path(base, document.get("figures_path", ""))
|
||||||
figure_payload = {}
|
figure_payload = {}
|
||||||
if figures_path.exists():
|
if figures_path.is_file():
|
||||||
figure_payload = json.loads(figures_path.read_text(encoding="utf-8"))
|
figure_payload = json.loads(figures_path.read_text(encoding="utf-8"))
|
||||||
source_path = str(figure_payload.get("source_path") or document.get("source_path") or relative_markdown)
|
source_path = str(figure_payload.get("source_path") or document.get("source_path") or relative_markdown)
|
||||||
source_path_kind = str(figure_payload.get("source_path_kind") or document.get("source_path_kind") or "source_root_relative")
|
source_path_kind = str(figure_payload.get("source_path_kind") or document.get("source_path_kind") or "source_root_relative")
|
||||||
|
|
@ -144,22 +290,7 @@ class DocliftBundleSourceAdapter:
|
||||||
"current_status": "draft",
|
"current_status": "draft",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
claim_rows.append(
|
document_claim_ids: list[str] = []
|
||||||
{
|
|
||||||
"claim_id": f"clm_doclift_{index}",
|
|
||||||
"import_id": context.import_id,
|
|
||||||
"claim_text": f"{title} is a {document.get('document_kind', 'document')} in the imported doclift bundle.",
|
|
||||||
"claim_kind": "summary",
|
|
||||||
"source_observation_ids": [observation_id],
|
|
||||||
"supporting_fragment_ids": [],
|
|
||||||
"concept_ids": [concept_id],
|
|
||||||
"contradicts_claim_ids": [],
|
|
||||||
"supersedes_claim_ids": [],
|
|
||||||
"confidence_hint": 0.85,
|
|
||||||
"grounding_status": "grounded",
|
|
||||||
"current_status": "triaged",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
for chunk_index, chunk in enumerate(self._load_chunks(base, document), start=1):
|
for chunk_index, chunk in enumerate(self._load_chunks(base, document), start=1):
|
||||||
chunk_text = str(chunk.get("text") or "").strip()
|
chunk_text = str(chunk.get("text") or "").strip()
|
||||||
if not chunk_text:
|
if not chunk_text:
|
||||||
|
|
@ -209,9 +340,10 @@ class DocliftBundleSourceAdapter:
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
if chunk_role in {"claim", "summary"}:
|
if chunk_role in {"claim", "summary"}:
|
||||||
|
claim_id = f"clm_doclift_{index}_{chunk_index}"
|
||||||
claim_rows.append(
|
claim_rows.append(
|
||||||
{
|
{
|
||||||
"claim_id": f"clm_doclift_{index}_{chunk_index}",
|
"claim_id": claim_id,
|
||||||
"import_id": context.import_id,
|
"import_id": context.import_id,
|
||||||
"claim_text": chunk_text,
|
"claim_text": chunk_text,
|
||||||
"claim_kind": "statement" if chunk_role == "claim" else "summary",
|
"claim_kind": "statement" if chunk_role == "claim" else "summary",
|
||||||
|
|
@ -225,6 +357,69 @@ class DocliftBundleSourceAdapter:
|
||||||
"current_status": "triaged",
|
"current_status": "triaged",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
document_claim_ids.append(claim_id)
|
||||||
|
if not document_claim_ids and str(document.get("document_kind") or "").strip() in {"web_article", "document"}:
|
||||||
|
for derived_index, claim_text in enumerate(self.extract_document_claims(base, document, strategy="conservative"), start=1):
|
||||||
|
derived_observation_id = f"obs_doclift_{index}_derived_{derived_index}"
|
||||||
|
claim_id = f"clm_doclift_{index}_derived_{derived_index}"
|
||||||
|
observation_rows.append(
|
||||||
|
{
|
||||||
|
"observation_id": derived_observation_id,
|
||||||
|
"import_id": context.import_id,
|
||||||
|
"artifact_id": artifact_id,
|
||||||
|
"role": "claim",
|
||||||
|
"text": claim_text,
|
||||||
|
"origin_path": relative_markdown,
|
||||||
|
"origin_section": title,
|
||||||
|
"line_start": 0,
|
||||||
|
"line_end": 0,
|
||||||
|
"source_url": source_path,
|
||||||
|
"metadata": {
|
||||||
|
"source_path_kind": source_path_kind,
|
||||||
|
"derived_from": "markdown_sentence",
|
||||||
|
},
|
||||||
|
"grounding_status": "grounded",
|
||||||
|
"support_kind": "direct_source",
|
||||||
|
"confidence_hint": 0.65,
|
||||||
|
"current_status": "draft",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
claim_rows.append(
|
||||||
|
{
|
||||||
|
"claim_id": claim_id,
|
||||||
|
"import_id": context.import_id,
|
||||||
|
"claim_text": claim_text,
|
||||||
|
"claim_kind": "statement",
|
||||||
|
"source_observation_ids": [derived_observation_id],
|
||||||
|
"supporting_fragment_ids": [],
|
||||||
|
"concept_ids": [concept_id],
|
||||||
|
"contradicts_claim_ids": [],
|
||||||
|
"supersedes_claim_ids": [],
|
||||||
|
"confidence_hint": 0.65,
|
||||||
|
"grounding_status": "grounded",
|
||||||
|
"current_status": "triaged",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
document_claim_ids.append(claim_id)
|
||||||
|
if not document_claim_ids:
|
||||||
|
fallback_claim_id = f"clm_doclift_{index}"
|
||||||
|
claim_rows.append(
|
||||||
|
{
|
||||||
|
"claim_id": fallback_claim_id,
|
||||||
|
"import_id": context.import_id,
|
||||||
|
"claim_text": f"{title} is a {document.get('document_kind', 'document')} in the imported doclift bundle.",
|
||||||
|
"claim_kind": "summary",
|
||||||
|
"source_observation_ids": [observation_id],
|
||||||
|
"supporting_fragment_ids": [],
|
||||||
|
"concept_ids": [concept_id],
|
||||||
|
"contradicts_claim_ids": [],
|
||||||
|
"supersedes_claim_ids": [],
|
||||||
|
"confidence_hint": 0.85,
|
||||||
|
"grounding_status": "grounded",
|
||||||
|
"current_status": "triaged",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
document_claim_ids.append(fallback_claim_id)
|
||||||
if previous_concept_id is not None:
|
if previous_concept_id is not None:
|
||||||
relation_rows.append(
|
relation_rows.append(
|
||||||
{
|
{
|
||||||
|
|
@ -233,7 +428,7 @@ class DocliftBundleSourceAdapter:
|
||||||
"source_id": previous_concept_id,
|
"source_id": previous_concept_id,
|
||||||
"target_id": concept_id,
|
"target_id": concept_id,
|
||||||
"relation_type": "references",
|
"relation_type": "references",
|
||||||
"evidence_ids": [f"clm_doclift_{index}"],
|
"evidence_ids": document_claim_ids[:1],
|
||||||
"current_status": "draft",
|
"current_status": "draft",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -460,6 +460,7 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di
|
||||||
concept_reviews.append(
|
concept_reviews.append(
|
||||||
{
|
{
|
||||||
"concept_id": concept.concept_id,
|
"concept_id": concept.concept_id,
|
||||||
|
"label": concept.title,
|
||||||
"title": concept.title,
|
"title": concept.title,
|
||||||
"status": concept.status,
|
"status": concept.status,
|
||||||
"description": concept.description,
|
"description": concept.description,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
{
|
||||||
|
"documents": [
|
||||||
|
{
|
||||||
|
"document_id": "intro-essay",
|
||||||
|
"gold_claims": [
|
||||||
|
"Evolution is a change in the gene pool of a population over time.",
|
||||||
|
"Populations evolve, but individual organisms do not evolve during their lifetimes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"document_id": "drift-essay",
|
||||||
|
"gold_claims": [
|
||||||
|
"Random genetic drift is a fundamental and important part of evolution.",
|
||||||
|
"Neutral and slightly deleterious alleles can be fixed in a population by random genetic drift."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
12
tests/fixtures/doclift_claim_eval/documents/drift-essay/document.chunks.json
vendored
Normal file
12
tests/fixtures/doclift_claim_eval/documents/drift-essay/document.chunks.json
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
{
|
||||||
|
"chunks": [
|
||||||
|
{
|
||||||
|
"chunk_id": "drift-essay-body-1",
|
||||||
|
"role": "body",
|
||||||
|
"section": "Drift Essay",
|
||||||
|
"text": "This essay has been transferred here from an old server that has been decommissioned. Random genetic drift is a fundamental and important part of evolution. Neutral and slightly deleterious alleles can be fixed in a population by random genetic drift.",
|
||||||
|
"line_start": 1,
|
||||||
|
"line_end": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
# Drift Essay
|
||||||
|
|
||||||
|
This essay has been transferred here from an old server that has been decommissioned.
|
||||||
|
|
||||||
|
Random genetic drift is a fundamental and important part of evolution.
|
||||||
|
|
||||||
|
Neutral and slightly deleterious alleles can be fixed in a population by random genetic drift.
|
||||||
|
|
||||||
|
Posted by Example Author
|
||||||
12
tests/fixtures/doclift_claim_eval/documents/intro-essay/document.chunks.json
vendored
Normal file
12
tests/fixtures/doclift_claim_eval/documents/intro-essay/document.chunks.json
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
{
|
||||||
|
"chunks": [
|
||||||
|
{
|
||||||
|
"chunk_id": "intro-essay-body-1",
|
||||||
|
"role": "body",
|
||||||
|
"section": "Intro Essay",
|
||||||
|
"text": "Evolution is a change in the gene pool of a population over time. Populations evolve. Individual organisms do not evolve, because they retain the same genes throughout their lives.",
|
||||||
|
"line_start": 1,
|
||||||
|
"line_end": 4
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
# Intro Essay
|
||||||
|
|
||||||
|
Introduction to Evolutionary Biology
|
||||||
|
|
||||||
|
Evolution is a change in the gene pool of a population over time.
|
||||||
|
|
||||||
|
Populations evolve. Individual organisms do not evolve, because they retain the same genes throughout their lives.
|
||||||
|
|
||||||
|
Posted by Example Author
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"documents": [
|
||||||
|
{
|
||||||
|
"document_id": "intro-essay",
|
||||||
|
"title": "Intro Essay",
|
||||||
|
"document_kind": "web_article",
|
||||||
|
"output_dir": "documents/intro-essay",
|
||||||
|
"markdown_path": "documents/intro-essay/document.md",
|
||||||
|
"chunks_path": "documents/intro-essay/document.chunks.json"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"document_id": "drift-essay",
|
||||||
|
"title": "Drift Essay",
|
||||||
|
"document_kind": "web_article",
|
||||||
|
"output_dir": "documents/drift-essay",
|
||||||
|
"markdown_path": "documents/drift-essay/document.md",
|
||||||
|
"chunks_path": "documents/drift-essay/document.chunks.json"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,30 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from groundrecall.doclift_claim_tournament import evaluate_doclift_claim_tracks
|
||||||
|
|
||||||
|
|
||||||
|
def _fixture_root() -> Path:
|
||||||
|
return Path(__file__).parent / "fixtures" / "doclift_claim_eval"
|
||||||
|
|
||||||
|
|
||||||
|
def test_doclift_claim_tournament_scores_two_tracks() -> None:
|
||||||
|
root = _fixture_root()
|
||||||
|
result = evaluate_doclift_claim_tracks(root, root / "benchmark.json")
|
||||||
|
|
||||||
|
assert result["judge_summary"]["winner"] in {"conservative", "broad"}
|
||||||
|
assert set(result["judge_summary"]["tracks"].keys()) == {"conservative", "broad"}
|
||||||
|
assert len(result["per_document"]) == 2
|
||||||
|
intro = next(item for item in result["per_document"] if item["document_id"] == "intro-essay")
|
||||||
|
assert intro["tracks"][0]["predicted_claims"]
|
||||||
|
assert intro["tracks"][1]["predicted_claims"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_doclift_claim_tournament_broad_track_improves_recall_on_fixture() -> None:
|
||||||
|
root = _fixture_root()
|
||||||
|
result = evaluate_doclift_claim_tracks(root, root / "benchmark.json")
|
||||||
|
tracks = result["judge_summary"]["tracks"]
|
||||||
|
|
||||||
|
assert tracks["broad"]["recall"] >= tracks["conservative"]["recall"]
|
||||||
|
assert tracks["broad"]["matches"] >= tracks["conservative"]["matches"]
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
|
@ -286,9 +287,66 @@ def test_doclift_bundle_import_generates_structured_concepts(tmp_path: Path) ->
|
||||||
concept_ids = {item["concept_id"] for item in result.concepts}
|
concept_ids = {item["concept_id"] for item in result.concepts}
|
||||||
assert "concept::lecture-1" in concept_ids
|
assert "concept::lecture-1" in concept_ids
|
||||||
claim_ids = {item["claim_id"] for item in result.claims}
|
claim_ids = {item["claim_id"] for item in result.claims}
|
||||||
assert "clm_doclift_1" in claim_ids
|
|
||||||
assert "clm_doclift_1_1" in claim_ids
|
assert "clm_doclift_1_1" in claim_ids
|
||||||
|
assert "clm_doclift_1" not in claim_ids
|
||||||
assert result.observations[0]["source_url"] == "legacy/lecture-1.doc"
|
assert result.observations[0]["source_url"] == "legacy/lecture-1.doc"
|
||||||
assert len(result.fragments) == 2
|
assert len(result.fragments) == 2
|
||||||
assert result.fragments[0]["metadata"]["source_kind"] == "doclift_chunk"
|
assert result.fragments[0]["metadata"]["source_kind"] == "doclift_chunk"
|
||||||
assert result.claims[1]["supporting_fragment_ids"] == ["frag_doclift_1_1"]
|
claim_by_id = {item["claim_id"]: item for item in result.claims}
|
||||||
|
assert claim_by_id["clm_doclift_1_1"]["supporting_fragment_ids"] == ["frag_doclift_1_1"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_doclift_bundle_import_derives_claims_from_prose_when_chunks_are_body_only(tmp_path: Path) -> None:
|
||||||
|
root = tmp_path / "doclift_bundle_prose"
|
||||||
|
document_dir = root / "documents" / "essay-1"
|
||||||
|
document_dir.mkdir(parents=True)
|
||||||
|
(root / "manifest.json").write_text(
|
||||||
|
'{\n'
|
||||||
|
' "documents": [\n'
|
||||||
|
' {\n'
|
||||||
|
' "document_id": "essay-1",\n'
|
||||||
|
' "title": "Drift Essay",\n'
|
||||||
|
' "document_kind": "web_article",\n'
|
||||||
|
' "output_dir": "documents/essay-1",\n'
|
||||||
|
' "markdown_path": "documents/essay-1/document.md"\n'
|
||||||
|
' }\n'
|
||||||
|
' ]\n'
|
||||||
|
'}\n',
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(document_dir / "document.md").write_text(
|
||||||
|
"\n".join(
|
||||||
|
[
|
||||||
|
"# Drift Essay",
|
||||||
|
"",
|
||||||
|
"Random genetic drift can dominate allele-frequency change in small populations.",
|
||||||
|
"This matters because many alleles are fixed or lost without any adaptive advantage.",
|
||||||
|
"",
|
||||||
|
"Posted by Example Author",
|
||||||
|
]
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
(document_dir / "document.chunks.json").write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"chunks": [
|
||||||
|
{
|
||||||
|
"chunk_id": "essay-1-body-1",
|
||||||
|
"role": "body",
|
||||||
|
"section": "Drift Essay",
|
||||||
|
"text": "Random genetic drift can dominate allele-frequency change in small populations.",
|
||||||
|
"line_start": 1,
|
||||||
|
"line_end": 2,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = run_groundrecall_import(root, mode="quick", import_id="doclift-prose-test")
|
||||||
|
claim_texts = [item["claim_text"] for item in result.claims]
|
||||||
|
|
||||||
|
assert any("Random genetic drift can dominate allele-frequency change in small populations." in text for text in claim_texts)
|
||||||
|
assert not any(text == "Drift Essay is a web_article in the imported doclift bundle." for text in claim_texts)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue