Source code for uchrom.strc.tad.utils
# TAD utilities: BED file I/O and substructure operations
import numpy as np
import pandas as pd
[docs]
def load_tad_from_bed(bed_path):
"""Load TAD regions from BED file."""
regions = []
with open(bed_path, 'r') as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
parts = line.split('\t')
if len(parts) >= 3:
chrom = parts[0]
start = int(parts[1])
end = int(parts[2])
regions.append((chrom, start, end))
return regions
[docs]
def tad_regions_to_bin_indices(regions, bins_df, chrom=None):
"""Convert TAD genomic regions to bin indices."""
bin_indices = []
for rgn_chrom, rgn_start, rgn_end in regions:
if chrom is not None and rgn_chrom != chrom:
continue
mask = (
(bins_df['chrom'] == rgn_chrom) &
(bins_df['start'] >= rgn_start) &
(bins_df['end'] <= rgn_end)
)
matching_bins = bins_df[mask]
if len(matching_bins) > 0:
start_idx = matching_bins.index[0]
end_idx = matching_bins.index[-1]
bin_indices.append((start_idx, end_idx))
return bin_indices
[docs]
def substructures_from_tads(tad_indices, structure_coords):
"""Extract substructures based on TAD boundaries."""
substructures = []
for start_idx, end_idx in tad_indices:
subcoords = structure_coords[start_idx:end_idx+1].copy()
substructures.append(subcoords)
return substructures
[docs]
def merge_substructures(substructures, tad_indices, total_bins):
"""Merge TAD substructures back into full structure."""
full_coords = np.zeros((total_bins, 3))
for (start_idx, end_idx), subcoords in zip(tad_indices, substructures):
full_coords[start_idx:end_idx+1] = subcoords
return full_coords
[docs]
def save_tads_to_bed(tad_indices, bins_df, output_path):
"""Save detected TAD regions to BED file."""
with open(output_path, 'w') as f:
for start_idx, end_idx in tad_indices:
chrom = bins_df.iloc[start_idx]['chrom']
start = bins_df.iloc[start_idx]['start']
end = bins_df.iloc[end_idx]['end']
f.write(f"{chrom}\t{start}\t{end}\n")
print(f"TADs saved to {output_path}")