Source code for uchrom.auto_discovery.evidence

"""Evidence summaries for auto-discovery notebooks and reports."""

from __future__ import annotations

import math
from dataclasses import dataclass
from typing import Any, Mapping

from .ideas import DiscoveryIdea


[docs] @dataclass(frozen=True) class EvidenceConclusion: """Human-readable evidence classification for one explored idea.""" notebook_status: str hypothesis_status: str direction_status: str p_value: float | None effect_size: float | None summary: str
[docs] def classify_hypothesis_evidence( idea: DiscoveryIdea | Mapping[str, Any], verification: Mapping[str, Any] | None, *, alpha: float = 0.05, ) -> EvidenceConclusion: """Classify whether a verified notebook supports the biological hypothesis. ``verification.status == "pass"`` means U-Chrom could rerun the notebook and validate required fields, finite outputs, and an explicit statistical test. This function separately classifies the hypothesis evidence from p-value and effect direction so "verified" is not confused with "biologically true". """ if not isinstance(idea, DiscoveryIdea): idea = DiscoveryIdea.from_dict(idea) verification = dict(verification or {}) notebook_status = ( "Notebook verified" if verification.get("status") == "pass" else f"Notebook {verification.get('status', 'not verified')}" ) p_value = _finite_float_or_none(verification.get("p_value")) effect_size = _finite_float_or_none( verification.get("effect_size", verification.get("parameter_value")) ) hypothesis_test_status = verification.get("hypothesis_test_status") if hypothesis_test_status == "insufficient_data": return EvidenceConclusion( notebook_status=notebook_status, hypothesis_status="Inconclusive", direction_status="Insufficient data", p_value=p_value, effect_size=effect_size, summary=( "The notebook ran, but the selected subset was too small or too " "degenerate for the requested statistical claim." ), ) expected_sign = _expected_direction_sign(idea.expected_direction) observed_sign = _sign(effect_size) direction_status = _direction_status(expected_sign, observed_sign) if p_value is None: return EvidenceConclusion( notebook_status=notebook_status, hypothesis_status="Inconclusive", direction_status=direction_status, p_value=None, effect_size=effect_size, summary=( "The notebook did not expose a finite p-value, so the idea " "cannot be interpreted as statistical support." ), ) significant = p_value <= alpha if direction_status == "Opposite direction" and significant: hypothesis_status = "Contradicted" summary = ( "The hypothesis test is significant, but the observed effect is in " "the opposite direction from the idea." ) elif direction_status == "Opposite direction": hypothesis_status = "Not supported" summary = ( "The observed effect points opposite to the expected direction and " "does not provide statistical support in this subset." ) elif significant: hypothesis_status = "Supported" summary = ( "The observed effect is consistent with the expected direction and " "passes the nominal p <= 0.05 threshold." ) elif p_value <= 0.10: hypothesis_status = "Borderline" summary = ( "The observed effect is compatible with the expected direction, but " "does not pass the nominal p <= 0.05 threshold." ) else: hypothesis_status = "Not supported" summary = ( "The statistical test does not support the idea in this linked " "Takei subset under the notebook's operational definition." ) return EvidenceConclusion( notebook_status=notebook_status, hypothesis_status=hypothesis_status, direction_status=direction_status, p_value=p_value, effect_size=effect_size, summary=summary, )
[docs] def structured_conclusion_markdown( idea: DiscoveryIdea | Mapping[str, Any], verification: Mapping[str, Any] | None, ) -> str: """Build the standard final interpretation Markdown for a notebook.""" if not isinstance(idea, DiscoveryIdea): idea = DiscoveryIdea.from_dict(idea) verification = dict(verification or {}) conclusion = classify_hypothesis_evidence(idea, verification) test_method = verification.get("test_method", "not reported") parameter_value = _format_number(verification.get("parameter_value")) p_value = _format_p(conclusion.p_value) effect = _format_number(conclusion.effect_size) observed = _format_number(verification.get("observed_statistic")) if observed == "not reported": observed = parameter_value checks = verification.get("checks") or {} passed_checks = ", ".join(sorted(k for k, v in checks.items() if v == "pass")) if not passed_checks: passed_checks = "not reported" notes = verification.get("notes") or [] caveat = str(notes[0]) if notes else ( "Interpret this as an exploratory result for the linked Takei subset, " "not as a population-level biological proof." ) modalities = ", ".join(idea.modalities) or "not specified" cell_types = ", ".join(idea.cell_types) or "not specified" required = ", ".join(idea.required_fields[:8]) if len(idea.required_fields) > 8: required += ", ..." return "\n".join([ "## Final interpretation", "", f"**Hypothesis.** {idea.biological_hypothesis}", "", ( f"**Exploration.** The notebook operationalized the idea as " f"`{idea.computable_parameter}` using modalities `{modalities}` in " f"cell type(s) `{cell_types}`. Required data fields checked: " f"`{required}`." ), "", ( f"**Statistical evidence.** U-Chrom runner status: " f"**{conclusion.notebook_status}**. Test: {test_method}. " f"Observed statistic: {observed}; effect size: {effect}; " f"parameter value: {parameter_value}; p-value: {p_value}." ), "", ( f"**Conclusion.** **{conclusion.hypothesis_status}** " f"({conclusion.direction_status}). {conclusion.summary}" ), "", ( "**What verification means.** `Notebook verified` means the run " "passed schema/data checks, produced finite numeric output, and " "included an explicit p-value/effect-size hypothesis test. It does " "not mean the biological hypothesis is automatically correct." ), "", f"**Checks passed.** {passed_checks}.", "", f"**Main caveat.** {caveat}", "", ])
def _expected_direction_sign(text: str) -> int | None: lower = str(text).lower() if "positive" in lower: return 1 if "negative" in lower: return -1 positive_tokens = [ "larger", "higher", "greater", "increase", "enrich", "stronger", "more", "exceed", ] negative_tokens = [ "smaller", "lower", "less", "decrease", "reduced", "closer", "compact", ] if any(token in lower for token in positive_tokens): return 1 if any(token in lower for token in negative_tokens): return -1 return None def _direction_status(expected_sign: int | None, observed_sign: int | None) -> str: if expected_sign is None or observed_sign is None: return "Direction not classified" if observed_sign == 0: return "Near zero effect" if expected_sign == observed_sign: return "Expected direction" return "Opposite direction" def _finite_float_or_none(value: Any) -> float | None: try: out = float(value) except (TypeError, ValueError): return None if not math.isfinite(out): return None return out def _sign(value: float | None) -> int | None: if value is None: return None if abs(value) < 1e-12: return 0 return 1 if value > 0 else -1 def _format_p(value: float | None) -> str: if value is None: return "not reported" if value < 1e-4: return f"{value:.2e}" return f"{value:.4g}" def _format_number(value: Any) -> str: out = _finite_float_or_none(value) if out is None: return "not reported" if abs(out) >= 100 or (abs(out) < 1e-3 and out != 0): return f"{out:.3e}" return f"{out:.4g}"