import os
os.chdir('/Users/weizexu/Projects/U-Chrom')
print('cwd', os.getcwd())

cwd /Users/weizexu/Projects/U-Chrom

from pathlib import Path
import json
import os
os.environ.setdefault('MPLBACKEND', 'Agg')
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg', force=True)
import matplotlib.pyplot as plt
from uchrom import ChromData
from uchrom.auto_discovery import DiscoveryIdea, review_idea_against_schema

IDEA = DiscoveryIdea.from_dict({'idea_title': 'Granule-cell HP1alpha heterochromatin clustering in 3D traces', 'biological_hypothesis': 'Granule cells have spatially clustered HP1alpha-rich heterochromatin domains within chromosome traces.', 'computable_parameter': 'Granule_HP1alpha_clustering_ratio = median_observed_pairwise_distance_HP1alphaHigh / median_permuted_pairwise_distance_matchedSpots for HP1alpha-high spots within Granule trace-chromosome groups.', 'analysis_plan': 'Subset to Granule cells, stratify spots by trace_id and chrom, select top-quartile HP1alpha spots where enough spots exist, compute median pairwise Euclidean distances among selected spots, generate matched random spot sets within the same trace-chromosome groups, and test whether the observed/permuted ratio is below 1 using a permutation-derived p-value.', 'modalities': ['chromatin_tracing', 'if_tracks', 'cell_metadata'], 'idea_markdown': '### Rationale\nGranule cells may concentrate HP1alpha-marked heterochromatin into compact 3D clusters within traced chromosomes.\n\n### Data used\nUse 3D spot coordinates, trace and chromosome labels, Granule cell identity, and spot-level HP1alpha intensity.\n\n### Analysis sketch\nFor Granule traces, identify HP1alpha-high spots and compare their within-trace pairwise 3D distances to distances from matched random spot sets.\n\n### Expected result\nHP1alpha-high spots should be closer together than expected by chance if Granule heterochromatin forms compact spatial domains.\n\n### Validation checks\nConfirm field availability, sufficient Granule cells and trace-level HP1alpha-high spots, finite pairwise distances, empirical p-value, runtime limit, deterministic random seed, and matched permutation control.', 'cell_types': ['Granule'], 'required_fields': ['coords', 'spots.cell_id', 'spots.trace_id', 'spots.chrom', 'cells.cell_type', 'tracks.HP1alpha'], 'validation_checks': ['required_fields_exist', 'minimum_cell_count', 'minimum_spot_or_trace_count', 'finite_numeric_output', 'statistical_hypothesis_test_with_p_value', 'runtime_under_budget', 'deterministic_rerun', 'negative_control_or_permutation'], 'expected_direction': 'Ratio below 1, indicating HP1alpha-high spots are more spatially clustered than matched random spots in Granule traces.', 'complexity': 4, 'idea_id': 'granule-cell-hp1alpha-heterochromatin-clustering-3401a4f59b', 'metadata': {}})
H5CD_PATH = 'tmp/takei_auto_discovery_doc/takei_doc_auto_subset.h5cd'
RUN_OUTPUT_DIR = Path('tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg')
RUN_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
cdata = ChromData.read(H5CD_PATH) if H5CD_PATH else None
schema = cdata.discovery_schema if cdata is not None else None
adata = cdata.linked_adata if cdata is not None else None
print(IDEA.idea_id)
if cdata is not None:
    print(cdata)
    print(cdata.describe_for_agent(max_items=20))

granule-cell-hp1alpha-heterochromatin-clustering-3401a4f59b
ChromData: n_spots=56036, n_traces=213, n_cells=9
  spots:   ['chrom', 'start', 'end', 'trace_id', 'cell_id', 'name']
  cells:   ['leiden', 'cell_type', 'x_centroid', 'y_centroid', 'z_centroid', 'nuc_volume_um3', 'doublet', 'batch', 'n_transcripts', 'n_genes_by_counts'] (9 cells)
  cellm:   {'umap': (9, 2)}
  tracks:  ['CPSF6', 'ATRX', 'H4K8ac', 'HDAC2', 'H3K9ac', 'H3K9me3', 'H3K9me2', 'RNAPIISer2-P', 'H3', 'H3K36me2', 'UBTF', 'LaminB1', 'RNAPIISer5-P', 'RYBP', 'HP1beta', 'RING1B', 'H2A.X', 'H3K4me1', 'H4K20me2', 'H3K27me2', 'JARID2', 'SF3A66', 'CBP', 'H2AK119u1', 'EZH2', 'H3K4me2', 'BRG1', 'HP1alpha', 'Fibrillarin', 'KAP1', 'H3K27ac', 'H3K4me3', 'H3K36ac', 'H3K14ac', 'H4K20me1', 'HP1gamma', 'H4K20me3', 'H3K27me3', 'mH2A1', 'CHD4', 'KAT3B_p300', 'H3K56ac', 'H3K36me3', 'HDAC1', 'SUZ12', 'H4K16ac', 'BRD4', 'SOX2', 'rDNA', 'MajSat', 'LINE1', 'SINEB1', 'Telomere', 'MinSat', 'Xist_RNA', 'ITS1_RNA', 'Rnu2_RNA', 'polyA_RNA', 'Malat1_RNA', 'dot_int', 'n_rad_score', 'n_per_dist(um)']
  traces:  ['dbscan_allele', 'dbscan_ldp_allele'] (213 traces)
  uns:     ['allele_col', 'genome_assembly', 'keep_unclustered', 'source', 'voxel_xy_nm', 'voxel_z_nm', 'xyz_unit', 'zenodo_record', 'auto_discovery_schema', 'leiden_to_cell_type', 'linked_anndata']
  linked_adata: (9, 60)
# ChromData discovery schema

dataset: takei2025_doc_subset_pantheon_20
genome: mm10
xyz_unit: um
shape: 56036 spots, 213 traces, 9 cells

modalities:
- cell_metadata: present; operations: cell_type_stratification, embedding_visualization
- chromatin_tracing: present; operations: chromosome_subset, cell_subset, trace_subset, pairwise_3d_distance, intra_chromatin_distance, inter_chromatin_distance
- if_tracks: present; operations: marker_high_low_bin_selection, marker_stratified_distance, per_cell_marker_summary, per_cell_type_marker_summary
- rna_expression: present; operations: gene_expression_lookup, expression_stratification, gene_marker_correlation, chromatin_expression_association

chroms: 20 [chr1, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chrX]
cell_types: 3 [Bergmann=3, Granule=3, Purkinje=3]
tracks: 62 [CPSF6, ATRX, H4K8ac, HDAC2, H3K9ac, H3K9me3, H3K9me2, RNAPIISer2-P, H3, H3K36me2, UBTF, LaminB1, RNAPIISer5-P, RYBP, HP1beta, RING1B, H2A.X, H3K4me1, H4K20me2, H3K27me2 ...]
linked_adata: shape=[9, 60], X=csr_matrix
genes: 60 [Aldoc, Calb1, Cdh22, Drd3, Eomes, Ephb2, Foxj1, Gabra6, Gpr176, Grm1, Hspb1, Mrc1, Nefh, Npas3, Nptn, Olig1, Pcp2, Pcp4, Plcb3, Plcb4 ...]

known_missing:
- cellm['if_mean'] per-cell IF mean matrix
- raw RNA seqFISH spot geometry as a first-class ChromData component
- scRNA reference matrix for external expression comparison
- gene annotation cache for gene-neighborhood analyses

verification_required:
- required_fields_exist
- minimum_cell_count
- minimum_spot_or_trace_count
- finite_numeric_output
- statistical_hypothesis_test
- runtime_under_budget
- deterministic_rerun
- negative_control_or_permutation
- redundancy_against_existing_parameters

review = review_idea_against_schema(IDEA, schema) if schema is not None else None
print(None if review is None else review.to_dict())
assert review is None or review.accepted, review.to_dict()

{'accepted': True, 'errors': [], 'warnings': ['multi-modal idea should include a cell_id_alignment validation check'], 'missing_fields': []}

# Lightweight data inspection for fields, Granule coverage, and finite values
import numpy as np
import pandas as pd

spots_preview = cdata.spots.head(5).copy()
coords_shape = getattr(cdata, 'coords', None).shape if hasattr(cdata, 'coords') else None
hp1 = cdata.get_track('HP1alpha') if hasattr(cdata, 'get_track') else cdata.tracks['HP1alpha']
coords = np.asarray(cdata.coords)
cell_type_counts = cdata.cells['cell_type'].value_counts().to_dict()
granule_cell_ids = cdata.cells.index[cdata.cells['cell_type'].astype(str).eq('Granule')].astype(str).tolist()
spot_cell_as_str = cdata.spots['cell_id'].astype(str)
granule_mask = spot_cell_as_str.isin(granule_cell_ids)
finite_coord_mask = np.isfinite(coords).all(axis=1)
finite_hp1_mask = np.isfinite(np.asarray(hp1, dtype=float))
inspection_summary = {
    'coords_shape': coords_shape,
    'tracks_HP1alpha_len': int(len(hp1)),
    'cell_type_counts': cell_type_counts,
    'n_granule_cells': int(len(granule_cell_ids)),
    'n_granule_spots': int(granule_mask.sum()),
    'n_granule_finite_coords_hp1': int((granule_mask & finite_coord_mask & finite_hp1_mask).sum()),
    'n_granule_trace_chrom_groups': int(cdata.spots.loc[granule_mask, ['trace_id','chrom']].drop_duplicates().shape[0]),
}
print(json.dumps(inspection_summary, indent=2))
display(spots_preview)
print('HP1alpha preview:', np.asarray(hp1[:10], dtype=float))

{
  "coords_shape": [
    56036,
    3
  ],
  "tracks_HP1alpha_len": 56036,
  "cell_type_counts": {
    "Granule": 3,
    "Bergmann": 3,
    "Purkinje": 3
  },
  "n_granule_cells": 3,
  "n_granule_spots": 12085,
  "n_granule_finite_coords_hp1": 12085,
  "n_granule_trace_chrom_groups": 69
}
   chrom      start        end         trace_id cell_id        name
0  chr14   30425000   30450000  1_0_61_chr14_a2  1_0_61  chr14-1096
1   chr2   99725000   99750000   1_0_61_chr2_a2  1_0_61   chr2-3864
2  chr14   39625000   39650000  1_0_61_chr14_a1  1_0_61  chr14-1464
3   chr2   99950000   99975000   1_0_61_chr2_a2  1_0_61   chr2-3873
4  chr14  108450000  108475000  1_0_61_chr14_a1  1_0_61  chr14-4210
HP1alpha preview: [-0.5544  -1.2181  -1.1272  -1.2666  -1.1605  -0.88473 -0.88473 -1.3575
 -0.80291 -0.90291]

# Main compact exploration: Granule HP1alpha-high spatial clustering versus matched random spot sets
import os
os.environ.setdefault('MPLBACKEND', 'Agg')
os.chdir('/Users/weizexu/Projects/U-Chrom')
import json
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg', force=True)
import matplotlib.pyplot as plt
from IPython.display import display, Image

rng = np.random.default_rng(3401)
n_permutations = 500
min_group_spots = 8
workspace = Path('/Users/weizexu/Projects/U-Chrom')
output_dir_abs = workspace / 'tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg'
output_dir_abs.mkdir(parents=True, exist_ok=True)
result_path_rel = 'tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg/granule-cell-hp1alpha-heterochromatin-clustering-3401a4f59b_result.csv'
fig_path_rel = 'tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg/granule-cell-hp1alpha-heterochromatin-clustering-3401a4f59b_statistical_summary.png'
result_path_abs = workspace / result_path_rel
fig_path_abs = workspace / fig_path_rel

coords = np.asarray(cdata.coords, dtype=float)
hp1 = np.asarray(cdata.tracks['HP1alpha'], dtype=float)
spots = cdata.spots.reset_index(drop=True).copy()
spots['_row'] = np.arange(len(spots))
spots['HP1alpha'] = hp1
cells = cdata.cells.copy()
granule_cells = set(cells.index[cells['cell_type'].astype(str).eq('Granule')].astype(str))
mask = (
    spots['cell_id'].astype(str).isin(granule_cells)
    & np.isfinite(hp1)
    & np.isfinite(coords).all(axis=1)
)
granule = spots.loc[mask, ['_row', 'cell_id', 'trace_id', 'chrom', 'HP1alpha']].copy()

def median_pairwise_distance(row_indices):
    pts = coords[np.asarray(row_indices, dtype=int)]
    n = len(pts)
    if n < 2:
        return np.nan
    diffs = pts[:, None, :] - pts[None, :, :]
    d = np.sqrt(np.sum(diffs * diffs, axis=2))
    tri = np.triu_indices(n, k=1)
    return float(np.median(d[tri]))

eligible_groups = []
group_rows = []
for (trace_id, chrom), g in granule.groupby(['trace_id', 'chrom'], sort=False):
    n_total = len(g)
    if n_total < min_group_spots:
        continue
    q75 = float(np.quantile(g['HP1alpha'].to_numpy(), 0.75))
    high = g.loc[g['HP1alpha'] >= q75]
    k = len(high)
    if k < 2 or k > n_total:
        continue
    obs_dist = median_pairwise_distance(high['_row'].to_numpy())
    if not np.isfinite(obs_dist):
        continue
    row_ids = g['_row'].to_numpy(dtype=int)
    eligible_groups.append({'trace_id': trace_id, 'chrom': chrom, 'row_ids': row_ids, 'k': int(k)})
    group_rows.append({
        'trace_id': trace_id,
        'chrom': chrom,
        'cell_id': str(g['cell_id'].iloc[0]),
        'n_spots': int(n_total),
        'n_hp1_high': int(k),
        'hp1_q75': q75,
        'observed_group_median_distance_um': obs_dist,
    })

group_table = pd.DataFrame(group_rows)
if len(group_table) == 0:
    observed_median = float('nan')
    null_medians = np.array([], dtype=float)
    p_value = float('nan')
    ratio = float('nan')
    effect_size = float('nan')
    hypothesis_test_status = 'insufficient_data'
    notes = ['No eligible Granule trace-chromosome groups after finite value and size filters.']
else:
    observed_group_distances = group_table['observed_group_median_distance_um'].to_numpy(dtype=float)
    observed_median = float(np.median(observed_group_distances))
    null_medians = np.empty(n_permutations, dtype=float)
    for b in range(n_permutations):
        perm_group_distances = []
        for eg in eligible_groups:
            sampled = rng.choice(eg['row_ids'], size=eg['k'], replace=False)
            perm_group_distances.append(median_pairwise_distance(sampled))
        null_medians[b] = np.nanmedian(perm_group_distances)
    null_median_center = float(np.median(null_medians))
    ratio = float(observed_median / null_median_center) if null_median_center > 0 else float('nan')
    p_value = float((np.sum(null_medians <= observed_median) + 1) / (len(null_medians) + 1))
    effect_size = float(1.0 - ratio)
    hypothesis_test_status = 'pass' if np.isfinite(p_value) and np.isfinite(effect_size) else 'insufficient_data'
    notes = []

null_hypothesis = 'Within Granule trace-chromosome groups, HP1alpha-high spots have the same or larger median pairwise 3D distance than matched random spot sets of equal size.'
alternative_hypothesis = 'Within Granule trace-chromosome groups, HP1alpha-high spots have smaller median pairwise 3D distance than matched random spot sets, consistent with spatial clustering.'
test_method = f'one-sided matched randomization test ({n_permutations} permutations; grouped by Granule trace_id and chrom)'

result_table = pd.DataFrame([{
    'idea_id': IDEA.idea_id,
    'n_selected_cells': int(len(granule_cells)),
    'n_granule_spots_finite': int(len(granule)),
    'n_eligible_trace_chrom_groups': int(len(group_table)),
    'observed_statistic': ratio,
    'observed_median_hp1high_distance_um': observed_median,
    'null_median_matched_distance_um': float(np.median(null_medians)) if len(null_medians) else np.nan,
    'effect_size': effect_size,
    'p_value': p_value,
    'test_method': test_method,
    'hypothesis_test_status': hypothesis_test_status,
}])
result_table.to_csv(result_path_abs, index=False)

analysis_summary = {
    'idea_id': IDEA.idea_id,
    'parameter_name': 'Granule_HP1alpha_clustering_ratio',
    'parameter_value': ratio,
    'n_rows': int(len(result_table)),
    'n_selected_cells': int(len(granule_cells)),
    'n_granule_spots_finite': int(len(granule)),
    'n_eligible_trace_chrom_groups': int(len(group_table)),
    'n_permutations': int(n_permutations),
    'null_hypothesis': null_hypothesis,
    'alternative_hypothesis': alternative_hypothesis,
    'test_method': test_method,
    'observed_statistic': ratio,
    'observed_median_hp1high_distance_um': observed_median,
    'null_median_matched_distance_um': float(np.median(null_medians)) if len(null_medians) else None,
    'p_value': p_value,
    'effect_size': effect_size,
    'hypothesis_test_status': hypothesis_test_status,
    'expected_direction': 'ratio below 1 / positive effect_size indicates HP1alpha-high spatial clustering',
    'result_path': result_path_rel,
    'notes': notes,
}

fig, ax = plt.subplots(figsize=(7.2, 4.8), facecolor='white')
if len(null_medians):
    ax.hist(null_medians, bins=30, color='#9ecae1', edgecolor='#225ea8', alpha=0.85, label='Matched random spot sets')
    ax.axvline(observed_median, color='#d62728', linewidth=2.5, label='Observed HP1alpha-high')
    ax.axvline(float(np.median(null_medians)), color='#08519c', linewidth=2, linestyle='--', label='Null median')
    ax.text(
        0.98, 0.95,
        f'p = {p_value:.4f}\nratio = {ratio:.3f}\neffect = {effect_size:.3f}\ngroups = {len(group_table)}\n{n_permutations} permutations',
        transform=ax.transAxes,
        ha='right', va='top', fontsize=10,
        bbox=dict(boxstyle='round,pad=0.35', facecolor='white', edgecolor='0.7', alpha=0.95),
    )
else:
    ax.text(0.5, 0.5, 'Insufficient eligible groups for permutation test', ha='center', va='center', transform=ax.transAxes)
ax.set_title('Granule HP1alpha-high 3D clustering within trace-chromosome groups')
ax.set_xlabel('Median within-group pairwise 3D distance (µm)')
ax.set_ylabel('Permutation count')
ax.legend(frameon=False, loc='upper left')
ax.grid(axis='y', alpha=0.25)
fig.tight_layout()
fig.savefig(fig_path_abs, dpi=180, bbox_inches='tight')
plt.close(fig)

display(result_table)
print(json.dumps(analysis_summary, indent=2))
display(Image(filename=str(fig_path_abs)))
print('Saved result_table:', result_path_rel, 'exists=', result_path_abs.exists())
print('Saved statistical figure:', fig_path_rel, 'exists=', fig_path_abs.exists())
print('Group table preview:')
display(group_table.sort_values('observed_group_median_distance_um').head(10))

                                             idea_id  ...  hypothesis_test_status
0  granule-cell-hp1alpha-heterochromatin-clusteri...  ...                    pass

[1 rows x 11 columns]
{
  "idea_id": "granule-cell-hp1alpha-heterochromatin-clustering-3401a4f59b",
  "parameter_name": "Granule_HP1alpha_clustering_ratio",
  "parameter_value": 0.9058851843614169,
  "n_rows": 1,
  "n_selected_cells": 3,
  "n_granule_spots_finite": 12085,
  "n_eligible_trace_chrom_groups": 69,
  "n_permutations": 500,
  "null_hypothesis": "Within Granule trace-chromosome groups, HP1alpha-high spots have the same or larger median pairwise 3D distance than matched random spot sets of equal size.",
  "alternative_hypothesis": "Within Granule trace-chromosome groups, HP1alpha-high spots have smaller median pairwise 3D distance than matched random spot sets, consistent with spatial clustering.",
  "test_method": "one-sided matched randomization test (500 permutations; grouped by Granule trace_id and chrom)",
  "observed_statistic": 0.9058851843614169,
  "observed_median_hp1high_distance_um": 1.1437323657249223,
  "null_median_matched_distance_um": 1.2625577561809562,
  "p_value": 0.001996007984031936,
  "effect_size": 0.09411481563858315,
  "hypothesis_test_status": "pass",
  "expected_direction": "ratio below 1 / positive effect_size indicates HP1alpha-high spatial clustering",
  "result_path": "tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg/granule-cell-hp1alpha-heterochromatin-clustering-3401a4f59b_result.csv",
  "notes": []
}
<IPython.core.display.Image object>
Saved result_table: tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg/granule-cell-hp1alpha-heterochromatin-clustering-3401a4f59b_result.csv exists= True
Saved statistical figure: tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg/granule-cell-hp1alpha-heterochromatin-clustering-3401a4f59b_statistical_summary.png exists= True
Group table preview:
           trace_id  chrom  ...   hp1_q75  observed_group_median_distance_um
29  1_0_69_chr11_a1  chr11  ...  0.535790                           0.456101
23  1_0_42_chr18_a2  chr18  ...  0.799500                           0.487421
56   1_0_47_chrX_a1   chrX  ...  0.162600                           0.504782
33  1_0_69_chr16_a1  chr16  ...  1.021500                           0.577619
31  1_0_69_chr19_a1  chr19  ...  0.013698                           0.652603
34  1_0_69_chr14_a2  chr14  ...  1.164150                           0.679886
24  1_0_69_chr12_a1  chr12  ...  0.115382                           0.687803
38  1_0_69_chr19_a2  chr19  ...  0.356698                           0.725993
48   1_0_69_chr1_a1   chr1  ...  0.513025                           0.739511
58   1_0_47_chr7_a1   chr7  ...  1.517000                           0.782794

[10 rows x 7 columns]

tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg/notebooks/granule-cell-hp1alpha-heterochromatin-clustering-3401a4f59b.ipynb:51: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  ]

checks = {check: 'not_run' for check in IDEA.validation_checks}
notes = []
checks.setdefault('statistical_hypothesis_test', 'not_run')

def _check_keys(prefix):
    return [key for key in checks if key == prefix or key.startswith(prefix + ':')]

def _set_check(prefix, value):
    keys = _check_keys(prefix)
    if not keys:
        checks[prefix] = value
        return
    for key in keys:
        checks[key] = value

def _check_status(prefix):
    values = [checks[key] for key in _check_keys(prefix)]
    if not values:
        return None
    if 'fail' in values:
        return 'fail'
    if all(value == 'pass' for value in values):
        return 'pass'
    return values[0]

_set_check('required_fields_exist', 'pass' if review is not None and review.accepted else 'fail')
if _check_keys('cell_id_alignment'):
    aligned = True
    if cdata is not None and adata is not None and len(cdata.cells) == len(adata.obs_names):
        aligned = list(map(str, cdata.cells.index)) == list(map(str, adata.obs_names))
    _set_check('cell_id_alignment', 'pass' if aligned else 'fail')
if _check_keys('minimum_cell_count'):
    n_cells = analysis_summary.get('n_selected_cells')
    if n_cells is None and 'cell_type' in getattr(result_table, 'columns', []):
        n_cells = len(result_table)
    if n_cells is None:
        n_cells = len(cdata.cells) if cdata is not None and getattr(cdata, 'n_cells', 0) else 0
    _set_check('minimum_cell_count', 'pass' if n_cells >= 1 else 'fail')
if _check_keys('minimum_spot_or_trace_count'):
    n_rows = analysis_summary.get('n_rows')
    if n_rows is None:
        n_rows = len(result_table) if result_table is not None else 0
    _set_check('minimum_spot_or_trace_count', 'pass' if n_rows >= 1 else 'fail')
if _check_keys('finite_numeric_output'):
    value = analysis_summary.get('parameter_value')
    _set_check('finite_numeric_output', 'pass' if value is not None and np.isfinite(value) else 'fail')
if _check_keys('statistical_hypothesis_test'):
    p_value = analysis_summary.get('p_value')
    test_method = analysis_summary.get('test_method')
    null_hypothesis = analysis_summary.get('null_hypothesis')
    alternative_hypothesis = analysis_summary.get('alternative_hypothesis')
    observed_statistic = analysis_summary.get('observed_statistic')
    effect_size = analysis_summary.get('effect_size')
    hypothesis_test_status = analysis_summary.get('hypothesis_test_status', 'pass')
    try:
        p_float = float(p_value)
    except Exception:
        p_float = np.nan
    try:
        stat_float = float(observed_statistic)
    except Exception:
        stat_float = np.nan
    try:
        effect_float = float(effect_size)
    except Exception:
        effect_float = np.nan
    has_required_test = (
        test_method is not None
        and str(test_method).strip() != ''
        and null_hypothesis is not None
        and str(null_hypothesis).strip() != ''
        and alternative_hypothesis is not None
        and str(alternative_hypothesis).strip() != ''
        and np.isfinite(p_float)
        and 0.0 <= p_float <= 1.0
        and np.isfinite(stat_float)
        and np.isfinite(effect_float)
        and hypothesis_test_status != 'insufficient_data'
    )
    if result_table is not None and hasattr(result_table, 'columns'):
        has_required_test = has_required_test and 'p_value' in result_table.columns and 'test_method' in result_table.columns
    else:
        has_required_test = False
    _set_check('statistical_hypothesis_test', 'pass' if has_required_test else 'fail')
    if not has_required_test:
        notes.append('statistical_hypothesis_test failed: analysis_summary must include null_hypothesis, alternative_hypothesis, test_method, observed_statistic, effect_size, finite p_value in [0,1], and result_table columns p_value/test_method')
if _check_keys('negative_control_or_permutation'):
    test_method_text = str(analysis_summary.get('test_method', '')).lower()
    summary_keys_text = ' '.join(str(key).lower() for key in analysis_summary.keys())
    result_columns_text = ''
    if result_table is not None and hasattr(result_table, 'columns'):
        result_columns_text = ' '.join(str(col).lower() for col in result_table.columns)
    control_text = ' '.join([test_method_text, summary_keys_text, result_columns_text])
    has_control_or_permutation = any(
        token in control_text
        for token in ['permutation', 'randomization', 'shuffle', 'negative_control', 'null_distribution', 'control']
    )
    _set_check(
        'negative_control_or_permutation',
        'pass' if has_control_or_permutation else 'not_implemented',
    )
for check in list(checks):
    if checks[check] == 'not_run' and ('negative_control' in check or check.endswith('_control')):
        checks[check] = 'not_implemented'

required_for_pass = ['required_fields_exist', 'minimum_cell_count', 'finite_numeric_output', 'statistical_hypothesis_test']
status = 'pass'
for check in required_for_pass:
    if _check_status(check) == 'fail':
        status = 'fail'
        notes.append(f'{check} failed')
n_rows_for_status = analysis_summary.get('n_rows')
if n_rows_for_status is None:
    n_rows_for_status = len(result_table) if result_table is not None else 0
if n_rows_for_status == 0:
    status = 'fail'
    notes.append('analysis produced no result rows')

verification = {
    'idea_id': IDEA.idea_id,
    'status': status,
    'checks': checks,
    'parameter_value': analysis_summary.get('parameter_value'),
    'p_value': analysis_summary.get('p_value'),
    'test_method': analysis_summary.get('test_method'),
    'effect_size': analysis_summary.get('effect_size'),
    'result_path': analysis_summary.get('result_path'),
    'notes': notes + analysis_summary.get('notes', []),
}
print(json.dumps(verification, indent=2))

{
  "idea_id": "granule-cell-hp1alpha-heterochromatin-clustering-3401a4f59b",
  "status": "pass",
  "checks": {
    "required_fields_exist": "pass",
    "minimum_cell_count": "pass",
    "minimum_spot_or_trace_count": "pass",
    "finite_numeric_output": "pass",
    "statistical_hypothesis_test_with_p_value": "not_run",
    "runtime_under_budget": "not_run",
    "deterministic_rerun": "not_run",
    "negative_control_or_permutation": "pass",
    "statistical_hypothesis_test": "pass"
  },
  "parameter_value": 0.9058851843614169,
  "p_value": 0.001996007984031936,
  "test_method": "one-sided matched randomization test (500 permutations; grouped by Granule trace_id and chrom)",
  "effect_size": 0.09411481563858315,
  "result_path": "tmp/takei_auto_discovery_doc/run_pantheon_20_ideas_verified_agg/granule-cell-hp1alpha-heterochromatin-clustering-3401a4f59b_result.csv",
  "notes": []
}

Auto-discovery idea: Granule-cell HP1alpha heterochromatin clustering in 3D traces¶

Rationale¶

Data used¶

Analysis sketch¶

Expected result¶

Validation checks¶

Graphical abstract¶

Required data checks¶

Exploration¶

Critique and compact plan¶

Statistical figure¶

Runner verification summary¶

Final interpretation¶

Final interpretation¶