Source code for uchrom.auto_discovery.evidence

"""Evidence summaries for auto-discovery notebooks and reports."""

from __future__ import annotations

import math
from dataclasses import dataclass
from typing import Any, Mapping

from .ideas import DiscoveryIdea



[docs]
@dataclass(frozen=True)
class EvidenceConclusion:
    """Human-readable evidence classification for one explored idea."""

    notebook_status: str
    hypothesis_status: str
    direction_status: str
    p_value: float | None
    effect_size: float | None
    summary: str




[docs]
def classify_hypothesis_evidence(
    idea: DiscoveryIdea | Mapping[str, Any],
    verification: Mapping[str, Any] | None,
    *,
    alpha: float = 0.05,
) -> EvidenceConclusion:
    """Classify whether a verified notebook supports the biological hypothesis.

    ``verification.status == "pass"`` means U-Chrom could rerun the notebook and
    validate required fields, finite outputs, and an explicit statistical test.
    This function separately classifies the hypothesis evidence from p-value and
    effect direction so "verified" is not confused with "biologically true".
    """
    if not isinstance(idea, DiscoveryIdea):
        idea = DiscoveryIdea.from_dict(idea)
    verification = dict(verification or {})
    notebook_status = (
        "Notebook verified"
        if verification.get("status") == "pass"
        else f"Notebook {verification.get('status', 'not verified')}"
    )
    p_value = _finite_float_or_none(verification.get("p_value"))
    effect_size = _finite_float_or_none(
        verification.get("effect_size", verification.get("parameter_value"))
    )
    hypothesis_test_status = verification.get("hypothesis_test_status")
    if hypothesis_test_status == "insufficient_data":
        return EvidenceConclusion(
            notebook_status=notebook_status,
            hypothesis_status="Inconclusive",
            direction_status="Insufficient data",
            p_value=p_value,
            effect_size=effect_size,
            summary=(
                "The notebook ran, but the selected subset was too small or too "
                "degenerate for the requested statistical claim."
            ),
        )
    expected_sign = _expected_direction_sign(idea.expected_direction)
    observed_sign = _sign(effect_size)
    direction_status = _direction_status(expected_sign, observed_sign)

    if p_value is None:
        return EvidenceConclusion(
            notebook_status=notebook_status,
            hypothesis_status="Inconclusive",
            direction_status=direction_status,
            p_value=None,
            effect_size=effect_size,
            summary=(
                "The notebook did not expose a finite p-value, so the idea "
                "cannot be interpreted as statistical support."
            ),
        )

    significant = p_value <= alpha
    if direction_status == "Opposite direction" and significant:
        hypothesis_status = "Contradicted"
        summary = (
            "The hypothesis test is significant, but the observed effect is in "
            "the opposite direction from the idea."
        )
    elif direction_status == "Opposite direction":
        hypothesis_status = "Not supported"
        summary = (
            "The observed effect points opposite to the expected direction and "
            "does not provide statistical support in this subset."
        )
    elif significant:
        hypothesis_status = "Supported"
        summary = (
            "The observed effect is consistent with the expected direction and "
            "passes the nominal p <= 0.05 threshold."
        )
    elif p_value <= 0.10:
        hypothesis_status = "Borderline"
        summary = (
            "The observed effect is compatible with the expected direction, but "
            "does not pass the nominal p <= 0.05 threshold."
        )
    else:
        hypothesis_status = "Not supported"
        summary = (
            "The statistical test does not support the idea in this linked "
            "Takei subset under the notebook's operational definition."
        )
    return EvidenceConclusion(
        notebook_status=notebook_status,
        hypothesis_status=hypothesis_status,
        direction_status=direction_status,
        p_value=p_value,
        effect_size=effect_size,
        summary=summary,
    )




[docs]
def structured_conclusion_markdown(
    idea: DiscoveryIdea | Mapping[str, Any],
    verification: Mapping[str, Any] | None,
) -> str:
    """Build the standard final interpretation Markdown for a notebook."""
    if not isinstance(idea, DiscoveryIdea):
        idea = DiscoveryIdea.from_dict(idea)
    verification = dict(verification or {})
    conclusion = classify_hypothesis_evidence(idea, verification)
    test_method = verification.get("test_method", "not reported")
    parameter_value = _format_number(verification.get("parameter_value"))
    p_value = _format_p(conclusion.p_value)
    effect = _format_number(conclusion.effect_size)
    observed = _format_number(verification.get("observed_statistic"))
    if observed == "not reported":
        observed = parameter_value
    checks = verification.get("checks") or {}
    passed_checks = ", ".join(sorted(k for k, v in checks.items() if v == "pass"))
    if not passed_checks:
        passed_checks = "not reported"
    notes = verification.get("notes") or []
    caveat = str(notes[0]) if notes else (
        "Interpret this as an exploratory result for the linked Takei subset, "
        "not as a population-level biological proof."
    )
    modalities = ", ".join(idea.modalities) or "not specified"
    cell_types = ", ".join(idea.cell_types) or "not specified"
    required = ", ".join(idea.required_fields[:8])
    if len(idea.required_fields) > 8:
        required += ", ..."
    return "\n".join([
        "## Final interpretation",
        "",
        f"**Hypothesis.** {idea.biological_hypothesis}",
        "",
        (
            f"**Exploration.** The notebook operationalized the idea as "
            f"`{idea.computable_parameter}` using modalities `{modalities}` in "
            f"cell type(s) `{cell_types}`. Required data fields checked: "
            f"`{required}`."
        ),
        "",
        (
            f"**Statistical evidence.** U-Chrom runner status: "
            f"**{conclusion.notebook_status}**. Test: {test_method}. "
            f"Observed statistic: {observed}; effect size: {effect}; "
            f"parameter value: {parameter_value}; p-value: {p_value}."
        ),
        "",
        (
            f"**Conclusion.** **{conclusion.hypothesis_status}** "
            f"({conclusion.direction_status}). {conclusion.summary}"
        ),
        "",
        (
            "**What verification means.** `Notebook verified` means the run "
            "passed schema/data checks, produced finite numeric output, and "
            "included an explicit p-value/effect-size hypothesis test. It does "
            "not mean the biological hypothesis is automatically correct."
        ),
        "",
        f"**Checks passed.** {passed_checks}.",
        "",
        f"**Main caveat.** {caveat}",
        "",
    ])



def _expected_direction_sign(text: str) -> int | None:
    lower = str(text).lower()
    if "positive" in lower:
        return 1
    if "negative" in lower:
        return -1
    positive_tokens = [
        "larger",
        "higher",
        "greater",
        "increase",
        "enrich",
        "stronger",
        "more",
        "exceed",
    ]
    negative_tokens = [
        "smaller",
        "lower",
        "less",
        "decrease",
        "reduced",
        "closer",
        "compact",
    ]
    if any(token in lower for token in positive_tokens):
        return 1
    if any(token in lower for token in negative_tokens):
        return -1
    return None


def _direction_status(expected_sign: int | None, observed_sign: int | None) -> str:
    if expected_sign is None or observed_sign is None:
        return "Direction not classified"
    if observed_sign == 0:
        return "Near zero effect"
    if expected_sign == observed_sign:
        return "Expected direction"
    return "Opposite direction"


def _finite_float_or_none(value: Any) -> float | None:
    try:
        out = float(value)
    except (TypeError, ValueError):
        return None
    if not math.isfinite(out):
        return None
    return out


def _sign(value: float | None) -> int | None:
    if value is None:
        return None
    if abs(value) < 1e-12:
        return 0
    return 1 if value > 0 else -1


def _format_p(value: float | None) -> str:
    if value is None:
        return "not reported"
    if value < 1e-4:
        return f"{value:.2e}"
    return f"{value:.4g}"


def _format_number(value: Any) -> str:
    out = _finite_float_or_none(value)
    if out is None:
        return "not reported"
    if abs(out) >= 100 or (abs(out) < 1e-3 and out != 0):
        return f"{out:.3e}"
    return f"{out:.4g}"