11import os
22import sys
33sys .path .insert (0 , "./" )
4+ import numpy as np
45import pandas as pd
56from seqchromloader import SeqChromDatasetByDataFrame , SeqChromDatasetByBed , SeqChromDatasetByWds , SeqChromDataModule
67from seqchromloader import dump_data_webdataset , convert_data_webdataset
8+ from seqchromloader import get_genome_sizes , make_random_shift , make_flank , random_coords , chop_genome
79
810import unittest
911import tempfile
1012import shutil
1113import pathlib as pl
1214import webdataset as wds
15+ from pybedtools import BedTool
1316
1417class Test (unittest .TestCase ):
1518 def setUp (self ) -> None :
@@ -29,6 +32,55 @@ def tearDownClass(cls) -> None:
2932 def assertIsFile (self , path ):
3033 if not pl .Path (path ).resolve ().is_file ():
3134 raise AssertionError ("File does not exist: %s" % str (path ))
35+
36+ def test_get_genome_sizes (self ):
37+ genome_sizes_nochr10 = get_genome_sizes (genome = "mm10" , to_filter = ["chr10" ])
38+ genome_sizes_chr19chr11 = get_genome_sizes (genome = "mm10" , to_keep = ["chr19" , "chr11" ])
39+
40+ self .assertFalse (np .any (genome_sizes_nochr10 .chrom .unique ()== "chr10" ))
41+ self .assertTrue ([i in ["chr11" , "chr19" ] for i in genome_sizes_chr19chr11 .chrom .unique ()])
42+
43+ def test_make_random_shift (self ):
44+ coords = pd .DataFrame ({
45+ 'chrom' : ["chr1" , "chr2" ],
46+ 'start' : [30 , 100 ],
47+ 'end' :[50 , 150 ]
48+ })
49+ for i in range (1000 ):
50+ shifted_window = make_random_shift (coords = coords , L = 10 )
51+ self .assertTrue (max (abs ((shifted_window .start + shifted_window .end )/ 2 - (coords .start + coords .end )/ 2 )) <= 5 )
52+ self .assertTrue (np .all ((shifted_window .end - shifted_window .start ) == 10 ))
53+
54+ def test_make_flank (self ):
55+ coords = pd .DataFrame ({
56+ 'chrom' : ["chr1" , "chr2" ],
57+ 'start' : [30 , 100 ],
58+ 'end' :[50 , 150 ]
59+ })
60+ coords_flank = make_flank (coords , L = 20 , d = 30 )
61+ self .assertTrue (np .all (coords_flank .start == [60 , 145 ]))
62+ self .assertTrue (np .all (coords_flank .end == [80 , 165 ]))
63+
64+ def test_random_coords (self ):
65+ interval = BedTool ().from_dataframe (pd .DataFrame ({'chrom' : ['chr1' , 'chr3' ],
66+ 'start' : [0 , 500 ],
67+ 'end' : [50000 , 20000 ]}))
68+ coords_incl = random_coords (genome = "mm10" , incl = interval )
69+ coords_excl = random_coords (genome = "mm10" , excl = interval )
70+
71+ self .assertTrue (BedTool ().from_dataframe (coords_incl ).intersect (interval ).count ()== coords_incl .size )
72+ self .assertTrue (BedTool ().from_dataframe (coords_excl ).intersect (interval ).count ()== 0 )
73+
74+ def test_chop_genome (self ):
75+ interval = BedTool ().from_dataframe (pd .DataFrame ({'chrom' : ['chr2' , 'chr12' ],
76+ 'start' : [0 , 500 ],
77+ 'end' : [50000 , 20000 ]}))
78+ coords_incl = chop_genome (chroms = ["chr2" , "chr12" ], genome = "mm10" , stride = 1000 , l = 500 , incl = interval )
79+ coords_excl = chop_genome (chroms = ["chr2" , "chr12" ], genome = "mm10" , stride = 1000 , l = 500 , excl = interval )
80+ self .assertTrue (np .all ([coords_incl .start .iloc [i ] - coords_incl .start .iloc [i - 1 ] for i in range (1 , len (coords_incl ))]== 1000 ))
81+ self .assertTrue (np .all ([coords_excl .start .iloc [i ] - coords_excl .start .iloc [i - 1 ] for i in range (1 , len (coords_excl ))]== 1000 ))
82+ self .assertTrue (BedTool ().from_dataframe (coords_incl ).intersect (interval ).count ()== coords_incl .size )
83+ self .assertTrue (BedTool ().from_dataframe (coords_excl ).intersect (interval ).count ()== 0 )
3284
3385 def test_writer (self ):
3486 coords = pd .DataFrame ({
0 commit comments