@@ -28,7 +28,9 @@ def get_genome_sizes(gs=None, genome=None, to_filter=None, to_keep=None):
2828 elif genome :
2929 genome_sizes = (pd .DataFrame (chromsizes (genome ))
3030 .T
31- .rename (columns = {0 :"chrom" , 1 :"len" }))
31+ .reset_index ()
32+ .rename (columns = {"index" :"chrom" , 0 :"start" , 1 :"end" })
33+ .assign (length = lambda x : x ["end" ] - x ["start" ]))[["chrom" , "length" ]]
3234 else :
3335 raise Exception ("Either gs or genome should be provided!" )
3436
@@ -74,7 +76,7 @@ def filter_chromosomes(coords, to_filter=None, to_keep=None):
7476 corods_out = coords
7577 return corods_out
7678
77- def make_random_shift (coords , L , buffer = 25 ):
79+ def make_random_shift (coords , L , buffer = 0 ):
7880 """
7981 This function takes as input a set of bed coordinates dataframe
8082 It finds the mid-point for each record or Interval in the bed file,
@@ -152,8 +154,8 @@ def random_coords(gs:str=None, genome:str=None, incl:BedTool=None, excl:BedTool=
152154 else :
153155 raise Exception ("Either gs or genome should be provided!" )
154156
155- if incl : shuffle_kwargs .update ({"incl" : incl })
156- if excl : shuffle_kwargs .update ({"excl" : excl })
157+ if incl : shuffle_kwargs .update ({"incl" : incl . fn })
158+ if excl : shuffle_kwargs .update ({"excl" : excl . fn })
157159
158160 return (BedTool ()
159161 .random (l = l , n = n , ** random_kwargs )
@@ -194,7 +196,7 @@ def intervals_loop(chrom, start, stride, l, size):
194196
195197 genome_sizes = get_genome_sizes (gs = gs , genome = genome , to_keep = chroms )
196198
197- genome_chops = pd .concat ([intervals_loop (i .Index , 0 , stride , l , i .len )
199+ genome_chops = pd .concat ([intervals_loop (i .chrom , 0 , stride , l , i .length )
198200 for i in genome_sizes .itertuples ()])
199201 genome_chops_bdt = BedTool .from_dataframe (genome_chops )
200202
@@ -282,8 +284,8 @@ def __init__(self, chrom, start, end, *args):
282284 def __str__ (self ) -> str :
283285 return f'Chromatin Info Inaccessible in region { self .chrom } :{ self .start } -{ self .end } '
284286
285- def extract_info (chrom , start , end , label , genome_pyfasta , bigwigs , target_bam , strand = "+" , transforms :dict = None ):
286- seq = genome_pyfasta [chrom ][int (start ):int (end )]
287+ def extract_info (chrom , start , end , label , genome_pyfaidx , bigwigs , target_bam , strand = "+" , transforms :dict = None ):
288+ seq = genome_pyfaidx [chrom ][int (start ):int (end )]. seq
287289 if strand == "-" :
288290 seq = rev_comp (seq )
289291 seq_array = dna2OneHot (seq )
0 commit comments