TAD calling¶
uchrom.strc.tad.call_tads_by_pval is a GPU-accelerated reimplementation
of the ArcFISH TAD caller (Yu et al. 2025). For each candidate boundary
position it asks: is the variance inside the window smaller than the
variance across the window? A per-axis F-test answers this and ACAT
combines the three axes into one p-value per bin. Local minima in that
p-value are scored as TAD boundaries; the hierarchical option emits
nested TAD levels by repeatedly dropping the weakest boundary.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from uchrom import ChromData
from uchrom.strc.tad import call_tads_by_pval, TADCallerParams
from uchrom.fea import mean_distance_matrix
from uchrom.pl import plot_distance_matrix
Load the example FOF-CT¶
Uses the Takei et al. 2021 Nature mESC chromatin-tracing dataset (4DN 4DNFIHF3JCBY); auto-downloaded from the public 4DN S3 bucket on first run, cached under example-data/.
from pathlib import Path
import os
TAKEI_URL = (
'https://4dn-open-data-public.s3.amazonaws.com/fourfront-webprod/'
'wfoutput/e699334e-fb34-4a0e-8ef6-670b2099831a/4DNFIHF3JCBY.csv'
)
def _repo_root() -> Path:
# Walk up until we find the repo marker (pyproject.toml); fall back to
# cwd if nothing matches. Avoid matching on ``example-data/`` alone —
# prior notebook runs may have synthesised a ``tutorials/example-data/``.
for p in [Path.cwd(), *Path.cwd().parents]:
if (p / 'pyproject.toml').exists():
return p
return Path.cwd()
def find_fofct() -> str:
root = _repo_root()
candidates = [
root / 'example-data' / '4DNFIHF3JCBY.csv',
Path.home() / 'Downloads' / 'ArcFISH_data' / '4DNFIHF3JCBY.csv',
Path.home() / 'Downloads' / 'ArcFISH_data' / '4DNFIHF3JCBY.h5cd',
Path.home() / 'Downloads' / '4DNFIHF3JCBY.csv',
]
for c in candidates:
if c.exists():
return str(c)
# Fall back to an earlier synthetic build if present
cand = root / 'example-data' / 'fofct_core.csv'
if cand.exists():
return str(cand)
# Download
dest = root / 'example-data' / '4DNFIHF3JCBY.csv'
dest.parent.mkdir(parents=True, exist_ok=True)
print(f'[data] downloading Takei 2021 (~22 MB) → {dest}')
import urllib.request
tmp = dest.with_suffix(dest.suffix + '.part')
urllib.request.urlretrieve(TAKEI_URL, tmp)
tmp.rename(dest)
return str(dest)
FOFCT = find_fofct()
print(FOFCT)
cd = ChromData.read(FOFCT) if FOFCT.endswith(".h5cd") else ChromData.from_fofct(FOFCT)
cd
Run the TAD caller¶
chrom = cd.spots['chrom'].astype(str).iloc[0]
params = TADCallerParams(
window_bp=1e5, # boundary window (bp)
fdr_cutoff=0.1,
hierarchical=True,
max_levels=3,
)
tads = call_tads_by_pval(cd, chrom=chrom, params=params, device='auto', verbose=True)
print(f'{len(tads)} TAD entries across {tads["level"].nunique()} levels')
tads.head(15)
Overlay TAD blocks on the median distance matrix¶
A good TAD call should outline the low-distance blocks along the diagonal of the population-median pairwise distance matrix — the classic Hi-C TAD-view. We draw each TAD as a rectangle along the diagonal.
from matplotlib.patches import Rectangle
df = cd.get_chrom(chrom).to_dataframe()
dmat, bin_ids, n_traces = mean_distance_matrix(df, chrom=chrom)
B = len(bin_ids)
mid_bp = np.array([(s + e) * 0.5 for (s, e) in bin_ids])
def bin_of(bp):
return int(np.argmin(np.abs(mid_bp - bp)))
def draw_tad_rects(ax, tad_df, **kw):
for _, r in tad_df.iterrows():
a = bin_of(r['start'])
b = bin_of(r['end']) - 1 # inclusive end bin
if b - a < 1:
continue
ax.add_patch(Rectangle(
(a - 0.5, a - 0.5), b - a + 1, b - a + 1,
fill=False, **kw,
))
# Top-level TADs only
top_level = tads[tads['level'] == tads['level'].min()]
fig = plot_distance_matrix(dmat, title=f'Median distance, top-level TADs ({len(top_level)})')
draw_tad_rects(fig.axes[0], top_level, edgecolor='white', linewidth=1.5)
plt.show()
Hierarchy — nested TADs¶
With hierarchical=True, level 1 keeps only the strongest boundaries (coarse TADs); deeper levels add weaker ones. Overlaying a different colour per level on the same heatmap makes the nesting obvious.
fig = plot_distance_matrix(dmat, title='Hierarchical TADs')
ax = fig.axes[0]
level_colors = {1: 'white', 2: 'cyan', 3: 'orange', 4: 'magenta'}
for lv in sorted(tads['level'].unique()):
draw_tad_rects(
ax, tads[tads['level'] == lv],
edgecolor=level_colors.get(lv, 'yellow'),
linewidth=1.5 if lv == 1 else 1.0,
linestyle='-' if lv == 1 else '--',
)
handles = [
plt.Line2D([0], [0], color=level_colors.get(lv, 'yellow'),
ls='-' if lv == 1 else '--',
label=f'level {lv} ({(tads["level"]==lv).sum()} TADs)')
for lv in sorted(tads['level'].unique())
]
ax.legend(handles=handles, loc='upper right', fontsize=9, framealpha=0.6)
plt.show()
Boundary score track¶
Behind the scenes each bin has a per-axis F-test p-value for the hypothesis “intra-window variance < inter-window variance”. Plot the −log10 p along the chromosome to see where boundaries are most confident. TAD corners should coincide with peaks.
# Recompute the per-bin statistic directly so we can plot it.
from uchrom.fea.arc import axis_variance_cube, filter_normalize, axis_weight
from uchrom.strc.tad.arc_pval import _boundary_pvals
cube = axis_variance_cube(cd, chrom=chrom, device='auto')
cube = filter_normalize(cube)
w = axis_weight(cd, chrom=chrom, device='auto')
pvals_track, _ = _boundary_pvals(
cube['norm_var'], cube['count'],
np.array([(s+e)*0.5 for s,e in cube['bin_ids']]),
w, window_bp=1e5,
)
neg_log_p = -np.log10(np.clip(np.where(np.isfinite(pvals_track), pvals_track, 1.0), 1e-300, 1.0))
fig, ax = plt.subplots(figsize=(9, 3))
ax.plot(np.arange(len(neg_log_p)), neg_log_p, 'k-', lw=1)
ax.set_xlabel('Bin index'); ax.set_ylabel('-log10 p (boundary)')
ax.set_title('TAD-boundary signal along the chromosome')
# Mark level-1 boundary bins
for _, r in top_level.iterrows():
for bp in (r['start'], r['end']):
k = bin_of(bp)
if 0 < k < B:
ax.axvline(k, color='red', alpha=0.5, lw=1)
plt.tight_layout()
plt.show()
Notes¶
The variance-normalisation preprocessing is shared with :func:
uchrom.strc.loop.call_loops_axiswise_f— you only pay for it once per chromosome if you cache.For a “flat” TAD partition set
hierarchical=Falseor just uselevel == 1.This caller complements the classical directionality-index method available in :func:
uchrom.strc.tad.get_domains; they agree on strong TADs and disagree on boundary-marginal ones.