Skip to content

Commit 9cc67dd

Browse files
committed
add test for utility functions
1 parent ebaa070 commit 9cc67dd

File tree

1 file changed

+52
-0
lines changed

1 file changed

+52
-0
lines changed

tests/test_writer_loader.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
import os
22
import sys
33
sys.path.insert(0, "./")
4+
import numpy as np
45
import pandas as pd
56
from seqchromloader import SeqChromDatasetByDataFrame, SeqChromDatasetByBed, SeqChromDatasetByWds, SeqChromDataModule
67
from seqchromloader import dump_data_webdataset, convert_data_webdataset
8+
from seqchromloader import get_genome_sizes, make_random_shift, make_flank, random_coords, chop_genome
79

810
import unittest
911
import tempfile
1012
import shutil
1113
import pathlib as pl
1214
import webdataset as wds
15+
from pybedtools import BedTool
1316

1417
class Test(unittest.TestCase):
1518
def setUp(self) -> None:
@@ -29,6 +32,55 @@ def tearDownClass(cls) -> None:
2932
def assertIsFile(self, path):
3033
if not pl.Path(path).resolve().is_file():
3134
raise AssertionError("File does not exist: %s" % str(path))
35+
36+
def test_get_genome_sizes(self):
37+
genome_sizes_nochr10=get_genome_sizes(genome="mm10", to_filter=["chr10"])
38+
genome_sizes_chr19chr11=get_genome_sizes(genome="mm10", to_keep=["chr19", "chr11"])
39+
40+
self.assertFalse(np.any(genome_sizes_nochr10.chrom.unique()=="chr10"))
41+
self.assertTrue([i in ["chr11", "chr19"] for i in genome_sizes_chr19chr11.chrom.unique()])
42+
43+
def test_make_random_shift(self):
44+
coords = pd.DataFrame({
45+
'chrom': ["chr1", "chr2"],
46+
'start': [30, 100],
47+
'end':[50, 150]
48+
})
49+
for i in range(1000):
50+
shifted_window = make_random_shift(coords=coords, L=10)
51+
self.assertTrue(max(abs((shifted_window.start + shifted_window.end)/2 - (coords.start + coords.end)/2)) <=5)
52+
self.assertTrue(np.all((shifted_window.end-shifted_window.start) == 10))
53+
54+
def test_make_flank(self):
55+
coords = pd.DataFrame({
56+
'chrom': ["chr1", "chr2"],
57+
'start': [30, 100],
58+
'end':[50, 150]
59+
})
60+
coords_flank = make_flank(coords, L=20, d=30)
61+
self.assertTrue(np.all(coords_flank.start == [60, 145]))
62+
self.assertTrue(np.all(coords_flank.end == [80, 165]))
63+
64+
def test_random_coords(self):
65+
interval = BedTool().from_dataframe(pd.DataFrame({'chrom': ['chr1', 'chr3'],
66+
'start': [0, 500],
67+
'end': [50000, 20000]}))
68+
coords_incl = random_coords(genome="mm10", incl=interval)
69+
coords_excl = random_coords(genome="mm10", excl=interval)
70+
71+
self.assertTrue(BedTool().from_dataframe(coords_incl).intersect(interval).count()==coords_incl.size)
72+
self.assertTrue(BedTool().from_dataframe(coords_excl).intersect(interval).count()==0)
73+
74+
def test_chop_genome(self):
75+
interval = BedTool().from_dataframe(pd.DataFrame({'chrom': ['chr2', 'chr12'],
76+
'start': [0, 500],
77+
'end': [50000, 20000]}))
78+
coords_incl = chop_genome(chroms=["chr2", "chr12"], genome="mm10", stride=1000, l=500, incl=interval)
79+
coords_excl = chop_genome(chroms=["chr2", "chr12"], genome="mm10", stride=1000, l=500, excl=interval)
80+
self.assertTrue(np.all([coords_incl.start.iloc[i] - coords_incl.start.iloc[i-1] for i in range(1, len(coords_incl))]==1000))
81+
self.assertTrue(np.all([coords_excl.start.iloc[i] - coords_excl.start.iloc[i-1] for i in range(1, len(coords_excl))]==1000))
82+
self.assertTrue(BedTool().from_dataframe(coords_incl).intersect(interval).count()==coords_incl.size)
83+
self.assertTrue(BedTool().from_dataframe(coords_excl).intersect(interval).count()==0)
3284

3385
def test_writer(self):
3486
coords = pd.DataFrame({

0 commit comments

Comments
 (0)