Explain claim support suggestions

This commit is contained in:
welsberr 2026-05-07 20:46:47 -04:00
parent 0f76e86000
commit e0199f77d4
2 changed files with 63 additions and 0 deletions

View File

@ -41,6 +41,7 @@ NON_CLAIM_START_PATTERN = re.compile(
re.IGNORECASE,
)
DOI_PATTERN = re.compile(r"\b10\.\d{4,9}/[-._;()/:A-Z0-9]+\b", re.IGNORECASE)
TOKEN_PATTERN = re.compile(r"[a-z0-9]{4,}")
@dataclass(slots=True)
@ -146,6 +147,13 @@ def analyze_support_gaps(
"journal": str(entry.fields.get("journal") or entry.fields.get("booktitle") or ""),
"source_label": source_label,
"score": round(float(score), 4),
"reason": _build_reference_reason(
claim.text,
title=title,
journal=str(entry.fields.get("journal") or entry.fields.get("booktitle") or ""),
source_label=source_label,
is_primary=entry is verification.entry,
),
}
)
@ -332,6 +340,43 @@ def _normalize_doi(value: str) -> str:
return value.strip().lower()
def _build_reference_reason(
claim_text: str,
*,
title: str,
journal: str,
source_label: str,
is_primary: bool,
) -> str:
claim_terms = _meaningful_tokens(claim_text)
title_terms = _meaningful_tokens(title)
journal_terms = _meaningful_tokens(journal)
overlap = sorted(claim_terms & title_terms)
overlap_preview = ", ".join(overlap[:3])
reasons: list[str] = []
reasons.append("Top candidate match." if is_primary else "Alternate candidate retained after verification.")
if overlap_preview:
reasons.append(f"Shares claim terms: {overlap_preview}.")
elif claim_terms & journal_terms:
reasons.append("Venue terms overlap with the claim topic.")
elif source_label.startswith("openalex:search:"):
reasons.append("Returned from topic-oriented OpenAlex search for this claim.")
elif source_label.startswith("crossref:search:"):
reasons.append("Returned from Crossref search for this claim.")
else:
reasons.append("Returned by the bibliography verifier for this claim.")
return " ".join(reasons)
def _meaningful_tokens(value: str) -> set[str]:
return {
token
for token in TOKEN_PATTERN.findall(value.lower())
if token not in {"this", "that", "with", "from", "their", "there", "into", "about", "through", "using"}
}
def _build_note(markers: list[str], titles: list[str]) -> str | None:
if not markers:
return "No existing inline citation markers detected for this claim."

View File

@ -64,6 +64,7 @@ J. J. Hopfield, David W. Tank
assert suggestion["existing_reference_titles"] == ["Neural computation of decisions in optimization problems"]
assert suggestion["suggested_references"][0]["title"] == "A Better Support Paper"
assert suggestion["needs_support_score"] > 0
assert suggestion["suggested_references"][0]["reason"].startswith("Top candidate match.")
titles = [item["title"] for item in suggestion["suggested_references"]]
assert "Neural computation of decisions in optimization problems" not in titles
@ -171,3 +172,20 @@ doi: 10.1000/existing
suggested_titles = [item["title"] for item in payload["suggestions"][0]["suggested_references"]]
assert "A Better Support Paper Retitled" not in suggested_titles
assert "A Different Support Paper" in suggested_titles
def test_analyze_support_gaps_includes_reason_for_alternate_candidates():
verifier = FakeVerifier()
text = """
Computational research touching on movement of agents spans many different fields. Movement strategies in artificial life systems can improve resource exploitation under selection pressures [1].
References
[[1]]Earlier Cited Paper
"""
payload = analyze_support_gaps(text, verifier=verifier, max_claims=3, min_claim_chars=40)
suggestion = payload["suggestions"][0]
assert len(suggestion["suggested_references"]) == 2
primary, alternate = suggestion["suggested_references"]
assert "Top candidate match." in primary["reason"]
assert "Alternate candidate retained after verification." in alternate["reason"]