from pathlib import Path
import json
import os
os.environ.setdefault('MPLBACKEND', 'Agg')
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg', force=True)
import matplotlib.pyplot as plt
from uchrom import ChromData
from uchrom.auto_discovery import DiscoveryIdea, review_idea_against_schema

IDEA = DiscoveryIdea.from_dict({'idea_title': 'Bergmann-specific LaminB1 peripheral anchoring signature', 'biological_hypothesis': 'Bergmann cells exhibit stronger LaminB1-associated peripheral chromatin positioning than Granule or Purkinje cells.', 'computable_parameter': 'Bergmann_LaminB1_peripheral_gap = median_gap_Bergmann - median_gap_nonBergmann, where each gap is median tracks.n_per_dist(um) for top-quartile LaminB1 spots minus bottom-quartile LaminB1 spots.', 'analysis_plan': 'Assign spots to cell types using spots.cell_id and cells.cell_type, define LaminB1 high and low quantiles within cell or cell-type strata, compute median tracks.n_per_dist(um) differences, aggregate to the single Bergmann-specific interaction parameter, and evaluate significance by permuting LaminB1 values within cells or permuting cell-type labels while reporting an empirical p-value.', 'modalities': ['chromatin_tracing', 'if_tracks', 'cell_metadata'], 'idea_markdown': '### Rationale\nBergmann cells may have a distinctive lamina-associated chromatin organization, visible as stronger LaminB1 enrichment near the nuclear periphery.\n\n### Data used\nUse spot-level LaminB1 intensity, nuclear periphery distance, traced spot-to-cell assignment, and cell-type labels.\n\n### Analysis sketch\nWithin each cell type, compare nuclear periphery distance for LaminB1-high versus LaminB1-low traced spots, then summarize the Bergmann-specific excess or depletion relative to other cell types.\n\n### Expected result\nIf LaminB1 marks lamina-proximal chromatin, LaminB1-high spots should be closer to the periphery, with the strongest effect in Bergmann cells.\n\n### Validation checks\nCheck field presence, at least three Bergmann cells, adequate spots per LaminB1 quantile, finite output, a permutation p-value, deterministic rerun, runtime budget, and shuffled-LaminB1 control.', 'cell_types': ['Bergmann', 'Granule', 'Purkinje'], 'required_fields': ['spots.cell_id', 'cells.cell_type', 'tracks.LaminB1', 'tracks.n_per_dist(um)'], 'validation_checks': ['required_fields_exist', 'minimum_cell_count', 'minimum_spot_or_trace_count', 'finite_numeric_output', 'statistical_hypothesis_test_with_p_value', 'runtime_under_budget', 'deterministic_rerun', 'negative_control_or_permutation'], 'expected_direction': 'Negative interaction if smaller tracks.n_per_dist(um) means closer to the periphery: LaminB1-high spots should have lower periphery distance most strongly in Bergmann cells.', 'complexity': 3, 'idea_id': 'bergmann-specific-laminb1-peripheral-anchoring-s-509bbf93ca', 'metadata': {}})
H5CD_PATH = '/Users/weizexu/Projects/U-Chrom/tmp/takei_auto_discovery_doc/takei_doc_auto_subset.h5cd'
RUN_OUTPUT_DIR = Path('tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg')
RUN_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
cdata = ChromData.read(H5CD_PATH) if H5CD_PATH else None
schema = cdata.discovery_schema if cdata is not None else None
adata = cdata.linked_adata if cdata is not None else None
print(IDEA.idea_id)
if cdata is not None:
    print(cdata)
    print(cdata.describe_for_agent(max_items=20))

bergmann-specific-laminb1-peripheral-anchoring-s-509bbf93ca
ChromData: n_spots=56036, n_traces=213, n_cells=9
  spots:   ['chrom', 'start', 'end', 'trace_id', 'cell_id', 'name']
  cells:   ['leiden', 'cell_type', 'x_centroid', 'y_centroid', 'z_centroid', 'nuc_volume_um3', 'doublet', 'batch', 'n_transcripts', 'n_genes_by_counts'] (9 cells)
  cellm:   {'umap': (9, 2)}
  tracks:  ['CPSF6', 'ATRX', 'H4K8ac', 'HDAC2', 'H3K9ac', 'H3K9me3', 'H3K9me2', 'RNAPIISer2-P', 'H3', 'H3K36me2', 'UBTF', 'LaminB1', 'RNAPIISer5-P', 'RYBP', 'HP1beta', 'RING1B', 'H2A.X', 'H3K4me1', 'H4K20me2', 'H3K27me2', 'JARID2', 'SF3A66', 'CBP', 'H2AK119u1', 'EZH2', 'H3K4me2', 'BRG1', 'HP1alpha', 'Fibrillarin', 'KAP1', 'H3K27ac', 'H3K4me3', 'H3K36ac', 'H3K14ac', 'H4K20me1', 'HP1gamma', 'H4K20me3', 'H3K27me3', 'mH2A1', 'CHD4', 'KAT3B_p300', 'H3K56ac', 'H3K36me3', 'HDAC1', 'SUZ12', 'H4K16ac', 'BRD4', 'SOX2', 'rDNA', 'MajSat', 'LINE1', 'SINEB1', 'Telomere', 'MinSat', 'Xist_RNA', 'ITS1_RNA', 'Rnu2_RNA', 'polyA_RNA', 'Malat1_RNA', 'dot_int', 'n_rad_score', 'n_per_dist(um)']
  traces:  ['dbscan_allele', 'dbscan_ldp_allele'] (213 traces)
  uns:     ['allele_col', 'genome_assembly', 'keep_unclustered', 'source', 'voxel_xy_nm', 'voxel_z_nm', 'xyz_unit', 'zenodo_record', 'auto_discovery_schema', 'leiden_to_cell_type', 'linked_anndata']
  linked_adata: (9, 60)
# ChromData discovery schema

dataset: takei2025_doc_subset_pantheon_20
genome: mm10
xyz_unit: um
shape: 56036 spots, 213 traces, 9 cells

modalities:
- cell_metadata: present; operations: cell_type_stratification, embedding_visualization
- chromatin_tracing: present; operations: chromosome_subset, cell_subset, trace_subset, pairwise_3d_distance, intra_chromatin_distance, inter_chromatin_distance
- if_tracks: present; operations: marker_high_low_bin_selection, marker_stratified_distance, per_cell_marker_summary, per_cell_type_marker_summary
- rna_expression: present; operations: gene_expression_lookup, expression_stratification, gene_marker_correlation, chromatin_expression_association

chroms: 20 [chr1, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chrX]
cell_types: 3 [Bergmann=3, Granule=3, Purkinje=3]
tracks: 62 [CPSF6, ATRX, H4K8ac, HDAC2, H3K9ac, H3K9me3, H3K9me2, RNAPIISer2-P, H3, H3K36me2, UBTF, LaminB1, RNAPIISer5-P, RYBP, HP1beta, RING1B, H2A.X, H3K4me1, H4K20me2, H3K27me2 ...]
linked_adata: shape=[9, 60], X=csr_matrix
genes: 60 [Aldoc, Calb1, Cdh22, Drd3, Eomes, Ephb2, Foxj1, Gabra6, Gpr176, Grm1, Hspb1, Mrc1, Nefh, Npas3, Nptn, Olig1, Pcp2, Pcp4, Plcb3, Plcb4 ...]

known_missing:
- cellm['if_mean'] per-cell IF mean matrix
- raw RNA seqFISH spot geometry as a first-class ChromData component
- scRNA reference matrix for external expression comparison
- gene annotation cache for gene-neighborhood analyses

verification_required:
- required_fields_exist
- minimum_cell_count
- minimum_spot_or_trace_count
- finite_numeric_output
- statistical_hypothesis_test
- runtime_under_budget
- deterministic_rerun
- negative_control_or_permutation
- redundancy_against_existing_parameters

review = review_idea_against_schema(IDEA, schema) if schema is not None else None
print(None if review is None else review.to_dict())
assert review is None or review.accepted, review.to_dict()

{'accepted': True, 'errors': [], 'warnings': ['multi-modal idea should include a cell_id_alignment validation check'], 'missing_fields': []}

# Lightweight data inspection: required fields, finite coverage, and cell-type counts
required_spot_cols = ['cell_id']
required_cell_cols = ['cell_type']
required_track_cols = ['LaminB1', 'n_per_dist(um)']
print('spots shape:', cdata.spots.shape)
print('cells shape:', cdata.cells.shape)
print('tracks shape:', cdata.tracks.shape)
print('required spot cols present:', {col: col in cdata.spots.columns for col in required_spot_cols})
print('required cell cols present:', {col: col in cdata.cells.columns for col in required_cell_cols})
print('required track cols present:', {col: col in cdata.tracks.columns for col in required_track_cols})
print('\ncell type counts:')
print(cdata.cells['cell_type'].value_counts(dropna=False).to_string())

preview = cdata.spots[['cell_id']].join(cdata.tracks[required_track_cols])
preview = preview.join(cdata.cells[['cell_type']], on='cell_id')
coverage = preview[['LaminB1', 'n_per_dist(um)']].apply(lambda s: np.isfinite(pd.to_numeric(s, errors='coerce')).mean())
print('\nfinite coverage:')
print(coverage.to_string())
print('\npreview rows:')
display(preview.head(8))

spots shape: (56036, 6)
cells shape: (9, 10)
tracks shape: (56036, 62)
required spot cols present: {'cell_id': True}
required cell cols present: {'cell_type': True}
required track cols present: {'LaminB1': True, 'n_per_dist(um)': True}

cell type counts:
cell_type
Granule     3
Bergmann    3
Purkinje    3

finite coverage:
LaminB1           1.0
n_per_dist(um)    1.0

preview rows:
  cell_id  LaminB1  n_per_dist(um) cell_type
0  1_0_61 -1.46070    1.812949e-01  Bergmann
1  1_0_61 -0.92373    5.474538e-02  Bergmann
2  1_0_61 -1.53310    0.000000e+00  Bergmann
3  1_0_61 -1.14700   -8.881784e-16  Bergmann
4  1_0_61 -1.10470    3.141663e-02  Bergmann
5  1_0_61 -1.27970    6.554444e-02  Bergmann
6  1_0_61 -1.15900    1.290660e-01  Bergmann
7  1_0_61 -1.65370   -8.881784e-16  Bergmann

# Main exploratory statistical test for Bergmann-specific LaminB1 peripheral anchoring
import os
os.environ.setdefault('MPLBACKEND', 'Agg')
import matplotlib
matplotlib.use('Agg', force=True)
import matplotlib.pyplot as plt
from itertools import combinations
from pathlib import Path
import json

ROOT = Path('/Users/weizexu/Projects/U-Chrom')
RESULT_REL = Path('tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg/bergmann-specific-laminb1-peripheral-anchoring-s-509bbf93ca_result.csv')
FIG_REL = Path('tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg/bergmann-specific-laminb1-peripheral-anchoring-s-509bbf93ca_statistical_summary.png')
RESULT_PATH = ROOT / RESULT_REL
FIG_PATH = ROOT / FIG_REL
RESULT_PATH.parent.mkdir(parents=True, exist_ok=True)

# Assemble spot-level table without assuming extra fields.
df = cdata.spots[['cell_id']].copy()
df['LaminB1'] = pd.to_numeric(cdata.tracks['LaminB1'], errors='coerce')
df['n_per_dist_um'] = pd.to_numeric(cdata.tracks['n_per_dist(um)'], errors='coerce')
df = df.join(cdata.cells[['cell_type']], on='cell_id')
df = df[df['cell_type'].isin(['Bergmann', 'Granule', 'Purkinje'])]
df = df[np.isfinite(df['LaminB1']) & np.isfinite(df['n_per_dist_um'])].copy()

# Per-cell top/bottom quartile LaminB1 contrast.
cell_rows = []
for cell_id, sub in df.groupby('cell_id', sort=True):
    cell_type = str(sub['cell_type'].iloc[0])
    q25 = float(sub['LaminB1'].quantile(0.25))
    q75 = float(sub['LaminB1'].quantile(0.75))
    low = sub[sub['LaminB1'] <= q25]['n_per_dist_um']
    high = sub[sub['LaminB1'] >= q75]['n_per_dist_um']
    if len(low) == 0 or len(high) == 0:
        continue
    median_low = float(np.median(low))
    median_high = float(np.median(high))
    gap_high_minus_low = median_high - median_low
    cell_rows.append({
        'cell_id': cell_id,
        'cell_type': cell_type,
        'n_spots': int(len(sub)),
        'q25_LaminB1': q25,
        'q75_LaminB1': q75,
        'n_low_quartile': int(len(low)),
        'n_high_quartile': int(len(high)),
        'median_periphery_dist_low_LaminB1_um': median_low,
        'median_periphery_dist_high_LaminB1_um': median_high,
        'gap_high_minus_low_um': float(gap_high_minus_low),
    })

cell_table = pd.DataFrame(cell_rows)
if cell_table.empty:
    raise RuntimeError('No cells had finite LaminB1 and n_per_dist(um) values in both quartiles.')

berg_mask = cell_table['cell_type'].eq('Bergmann').to_numpy()
gaps = cell_table['gap_high_minus_low_um'].to_numpy(dtype=float)
n_berg = int(berg_mask.sum())
n_total = int(len(cell_table))

if n_berg >= 1 and n_total > n_berg and np.all(np.isfinite(gaps)):
    obs_stat = float(gaps[berg_mask].mean() - gaps[~berg_mask].mean())
    null_stats = []
    indices = np.arange(n_total)
    for combo in combinations(indices, n_berg):
        perm_mask = np.zeros(n_total, dtype=bool)
        perm_mask[list(combo)] = True
        null_stats.append(float(gaps[perm_mask].mean() - gaps[~perm_mask].mean()))
    null_stats = np.asarray(null_stats, dtype=float)
    # One-sided exact label-permutation p-value for the expected negative Bergmann excess.
    p_value = float(np.mean(null_stats <= obs_stat))
    hypothesis_test_status = 'pass'
    test_note = f'exact enumeration of {len(null_stats)} cell-label assignments preserving n_Bergmann={n_berg}'
else:
    obs_stat = float(np.nanmean(gaps)) if len(gaps) else 0.0
    null_stats = np.array([0.0])
    p_value = 1.0
    hypothesis_test_status = 'insufficient_data'
    test_note = 'insufficient finite cell-level gaps for label-permutation comparison'

test_method = 'one-sided exact cell-label permutation test'
null_hypothesis = 'Bergmann cell labels are exchangeable with non-Bergmann labels for per-cell LaminB1 high-minus-low periphery-distance gaps.'
alternative_hypothesis = 'Bergmann cells have a more negative LaminB1 high-minus-low periphery-distance gap than non-Bergmann cells, indicating stronger peripheral anchoring for LaminB1-high spots.'
effect_size = obs_stat

result_table = cell_table.copy()
result_table['group'] = np.where(result_table['cell_type'].eq('Bergmann'), 'Bergmann', 'non-Bergmann')
result_table['observed_statistic'] = obs_stat
result_table['effect_size'] = effect_size
result_table['p_value'] = p_value
result_table['test_method'] = test_method
result_table['hypothesis_test_status'] = hypothesis_test_status
result_table.to_csv(RESULT_PATH, index=False)

analysis_summary = {
    'idea_id': IDEA.idea_id,
    'parameter_name': 'Bergmann_LaminB1_peripheral_gap',
    'parameter_value': float(obs_stat),
    'observed_statistic': float(obs_stat),
    'effect_size': float(effect_size),
    'p_value': float(p_value),
    'test_method': test_method,
    'hypothesis_test_status': hypothesis_test_status,
    'null_hypothesis': null_hypothesis,
    'alternative_hypothesis': alternative_hypothesis,
    'n_rows': int(len(df)),
    'n_selected_cells': int(n_total),
    'n_bergmann_cells': int(n_berg),
    'n_nonbergmann_cells': int(n_total - n_berg),
    'n_permutations_or_exact_assignments': int(len(null_stats)),
    'expected_direction': 'negative',
    'result_path': str(RESULT_REL),
    'statistical_figure_path': str(FIG_REL),
    'notes': [test_note, 'Gap is median n_per_dist(um) among top-quartile LaminB1 spots minus bottom-quartile LaminB1 spots within each cell.'],
}

# Statistical figure: group comparison plus observed statistic against null distribution.
plt.close('all')
fig, axes = plt.subplots(1, 2, figsize=(11, 4.2), constrained_layout=True)
fig.patch.set_facecolor('white')
for ax in axes:
    ax.set_facecolor('white')

# Left panel: per-cell gaps.
plot_df = result_table.copy()
groups = ['Bergmann', 'non-Bergmann']
for x, group in enumerate(groups):
    vals = plot_df.loc[plot_df['group'].eq(group), 'gap_high_minus_low_um'].to_numpy(float)
    jitter = np.linspace(-0.05, 0.05, len(vals)) if len(vals) > 1 else np.array([0.0])
    axes[0].scatter(np.full(len(vals), x) + jitter, vals, s=55, label=f'{group} cells (n={len(vals)})', alpha=0.9)
    if len(vals):
        axes[0].hlines(np.mean(vals), x - 0.22, x + 0.22, colors='black', linewidth=2)
axes[0].axhline(0, color='0.55', linewidth=1, linestyle='--')
axes[0].set_xticks(range(len(groups)), groups)
axes[0].set_ylabel('Per-cell median distance gap\nHigh LaminB1 - Low LaminB1 (um)')
axes[0].set_title('Cell-level LaminB1/periphery contrast')
axes[0].legend(frameon=False, fontsize=8, loc='best')

# Right panel: exact null distribution.
axes[1].hist(null_stats, bins=min(20, max(5, len(np.unique(np.round(null_stats, 8))))), color='#b8cbe6', edgecolor='white', label='label-permutation null')
axes[1].axvline(obs_stat, color='#b22222', linewidth=2.5, label='observed Bergmann - non-Bergmann')
axes[1].axvline(0, color='0.45', linewidth=1, linestyle='--')
axes[1].set_xlabel('Mean gap difference: Bergmann - non-Bergmann (um)')
axes[1].set_ylabel('Number of label assignments')
axes[1].set_title('Hypothesis-test evidence')
annotation = f'{test_method}\np={p_value:.3g}; effect={effect_size:.4g} um\nn cells={n_total}; exact assignments={len(null_stats)}'
axes[1].text(0.03, 0.97, annotation, transform=axes[1].transAxes, va='top', ha='left', fontsize=8.5, bbox=dict(boxstyle='round', facecolor='white', edgecolor='0.8', alpha=0.95))
axes[1].legend(frameon=False, fontsize=8, loc='lower right')
fig.suptitle('Bergmann-specific LaminB1 peripheral anchoring signature', fontsize=12)
fig.savefig(FIG_PATH, dpi=180, bbox_inches='tight')
display(fig)
plt.close(fig)

print('result_table:')
display(result_table)
print('\nanalysis_summary:')
print(json.dumps(analysis_summary, indent=2))

<Figure size 1100x420 with 2 Axes>
result_table:
   cell_id  ... hypothesis_test_status
0  1_0_116  ...                   pass
1   1_0_26  ...                   pass
2   1_0_34  ...                   pass
3   1_0_37  ...                   pass
4   1_0_42  ...                   pass
5   1_0_47  ...                   pass
6   1_0_61  ...                   pass
7   1_0_63  ...                   pass
8   1_0_69  ...                   pass

[9 rows x 16 columns]

analysis_summary:
{
  "idea_id": "bergmann-specific-laminb1-peripheral-anchoring-s-509bbf93ca",
  "parameter_name": "Bergmann_LaminB1_peripheral_gap",
  "parameter_value": 0.49836185532374777,
  "observed_statistic": 0.49836185532374777,
  "effect_size": 0.49836185532374777,
  "p_value": 0.9880952380952381,
  "test_method": "one-sided exact cell-label permutation test",
  "hypothesis_test_status": "pass",
  "null_hypothesis": "Bergmann cell labels are exchangeable with non-Bergmann labels for per-cell LaminB1 high-minus-low periphery-distance gaps.",
  "alternative_hypothesis": "Bergmann cells have a more negative LaminB1 high-minus-low periphery-distance gap than non-Bergmann cells, indicating stronger peripheral anchoring for LaminB1-high spots.",
  "n_rows": 56036,
  "n_selected_cells": 9,
  "n_bergmann_cells": 3,
  "n_nonbergmann_cells": 6,
  "n_permutations_or_exact_assignments": 84,
  "expected_direction": "negative",
  "result_path": "tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg/bergmann-specific-laminb1-peripheral-anchoring-s-509bbf93ca_result.csv",
  "statistical_figure_path": "tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg/bergmann-specific-laminb1-peripheral-anchoring-s-509bbf93ca_statistical_summary.png",
  "notes": [
    "exact enumeration of 84 cell-label assignments preserving n_Bergmann=3",
    "Gap is median n_per_dist(um) among top-quartile LaminB1 spots minus bottom-quartile LaminB1 spots within each cell."
  ]
}

checks = {check: 'not_run' for check in IDEA.validation_checks}
notes = []
checks.setdefault('statistical_hypothesis_test', 'not_run')

def _check_keys(prefix):
    return [key for key in checks if key == prefix or key.startswith(prefix + ':')]

def _set_check(prefix, value):
    keys = _check_keys(prefix)
    if not keys:
        checks[prefix] = value
        return
    for key in keys:
        checks[key] = value

def _check_status(prefix):
    values = [checks[key] for key in _check_keys(prefix)]
    if not values:
        return None
    if 'fail' in values:
        return 'fail'
    if all(value == 'pass' for value in values):
        return 'pass'
    return values[0]

_set_check('required_fields_exist', 'pass' if review is not None and review.accepted else 'fail')
if _check_keys('cell_id_alignment'):
    aligned = True
    if cdata is not None and adata is not None and len(cdata.cells) == len(adata.obs_names):
        aligned = list(map(str, cdata.cells.index)) == list(map(str, adata.obs_names))
    _set_check('cell_id_alignment', 'pass' if aligned else 'fail')
if _check_keys('minimum_cell_count'):
    n_cells = analysis_summary.get('n_selected_cells')
    if n_cells is None and 'cell_type' in getattr(result_table, 'columns', []):
        n_cells = len(result_table)
    if n_cells is None:
        n_cells = len(cdata.cells) if cdata is not None and getattr(cdata, 'n_cells', 0) else 0
    _set_check('minimum_cell_count', 'pass' if n_cells >= 1 else 'fail')
if _check_keys('minimum_spot_or_trace_count'):
    n_rows = analysis_summary.get('n_rows')
    if n_rows is None:
        n_rows = len(result_table) if result_table is not None else 0
    _set_check('minimum_spot_or_trace_count', 'pass' if n_rows >= 1 else 'fail')
if _check_keys('finite_numeric_output'):
    value = analysis_summary.get('parameter_value')
    _set_check('finite_numeric_output', 'pass' if value is not None and np.isfinite(value) else 'fail')
if _check_keys('statistical_hypothesis_test'):
    p_value = analysis_summary.get('p_value')
    test_method = analysis_summary.get('test_method')
    null_hypothesis = analysis_summary.get('null_hypothesis')
    alternative_hypothesis = analysis_summary.get('alternative_hypothesis')
    observed_statistic = analysis_summary.get('observed_statistic')
    effect_size = analysis_summary.get('effect_size')
    hypothesis_test_status = analysis_summary.get('hypothesis_test_status', 'pass')
    try:
        p_float = float(p_value)
    except Exception:
        p_float = np.nan
    try:
        stat_float = float(observed_statistic)
    except Exception:
        stat_float = np.nan
    try:
        effect_float = float(effect_size)
    except Exception:
        effect_float = np.nan
    has_required_test = (
        test_method is not None
        and str(test_method).strip() != ''
        and null_hypothesis is not None
        and str(null_hypothesis).strip() != ''
        and alternative_hypothesis is not None
        and str(alternative_hypothesis).strip() != ''
        and np.isfinite(p_float)
        and 0.0 <= p_float <= 1.0
        and np.isfinite(stat_float)
        and np.isfinite(effect_float)
        and hypothesis_test_status != 'insufficient_data'
    )
    if result_table is not None and hasattr(result_table, 'columns'):
        has_required_test = has_required_test and 'p_value' in result_table.columns and 'test_method' in result_table.columns
    else:
        has_required_test = False
    _set_check('statistical_hypothesis_test', 'pass' if has_required_test else 'fail')
    if not has_required_test:
        notes.append('statistical_hypothesis_test failed: analysis_summary must include null_hypothesis, alternative_hypothesis, test_method, observed_statistic, effect_size, finite p_value in [0,1], and result_table columns p_value/test_method')
if _check_keys('negative_control_or_permutation'):
    test_method_text = str(analysis_summary.get('test_method', '')).lower()
    summary_keys_text = ' '.join(str(key).lower() for key in analysis_summary.keys())
    result_columns_text = ''
    if result_table is not None and hasattr(result_table, 'columns'):
        result_columns_text = ' '.join(str(col).lower() for col in result_table.columns)
    control_text = ' '.join([test_method_text, summary_keys_text, result_columns_text])
    has_control_or_permutation = any(
        token in control_text
        for token in ['permutation', 'randomization', 'shuffle', 'negative_control', 'null_distribution', 'control']
    )
    _set_check(
        'negative_control_or_permutation',
        'pass' if has_control_or_permutation else 'not_implemented',
    )
for check in list(checks):
    if checks[check] == 'not_run' and ('negative_control' in check or check.endswith('_control')):
        checks[check] = 'not_implemented'

required_for_pass = ['required_fields_exist', 'minimum_cell_count', 'finite_numeric_output', 'statistical_hypothesis_test']
status = 'pass'
for check in required_for_pass:
    if _check_status(check) == 'fail':
        status = 'fail'
        notes.append(f'{check} failed')
n_rows_for_status = analysis_summary.get('n_rows')
if n_rows_for_status is None:
    n_rows_for_status = len(result_table) if result_table is not None else 0
if n_rows_for_status == 0:
    status = 'fail'
    notes.append('analysis produced no result rows')

verification = {
    'idea_id': IDEA.idea_id,
    'status': status,
    'checks': checks,
    'parameter_value': analysis_summary.get('parameter_value'),
    'p_value': analysis_summary.get('p_value'),
    'test_method': analysis_summary.get('test_method'),
    'effect_size': analysis_summary.get('effect_size'),
    'result_path': analysis_summary.get('result_path'),
    'notes': notes + analysis_summary.get('notes', []),
}
print(json.dumps(verification, indent=2))

{
  "idea_id": "bergmann-specific-laminb1-peripheral-anchoring-s-509bbf93ca",
  "status": "pass",
  "checks": {
    "required_fields_exist": "pass",
    "minimum_cell_count": "pass",
    "minimum_spot_or_trace_count": "pass",
    "finite_numeric_output": "pass",
    "statistical_hypothesis_test_with_p_value": "not_run",
    "runtime_under_budget": "not_run",
    "deterministic_rerun": "not_run",
    "negative_control_or_permutation": "pass",
    "statistical_hypothesis_test": "pass"
  },
  "parameter_value": 0.49836185532374777,
  "p_value": 0.9880952380952381,
  "test_method": "one-sided exact cell-label permutation test",
  "effect_size": 0.49836185532374777,
  "result_path": "tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg/bergmann-specific-laminb1-peripheral-anchoring-s-509bbf93ca_result.csv",
  "notes": [
    "exact enumeration of 84 cell-label assignments preserving n_Bergmann=3",
    "Gap is median n_per_dist(um) among top-quartile LaminB1 spots minus bottom-quartile LaminB1 spots within each cell."
  ]
}

Auto-discovery idea: Bergmann-specific LaminB1 peripheral anchoring signature¶

Rationale¶

Data used¶

Analysis sketch¶

Expected result¶

Validation checks¶

Graphical abstract¶

Required data checks¶

Exploration¶

Critique and compact analysis plan¶

Statistical figure¶

Runner verification summary¶

Final interpretation¶

Final interpretation¶