Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 22 additions & 13 deletions src/crackling/Crackling.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def processSequence(sequence):

duplicatePercent = round(numDuplicateGuides / numIdentifiedGuides * 100.0, 3)
printer(f'\tIdentified {numIdentifiedGuides:,} possible target sites in this file.')
printer(f'\tOf these, {len(duplicateGuides):,} are not unique. These sites occur a total of {numDuplicateGuides} times.')
printer(f'\tOf these, {len(duplicateGuides):,} are not unique. These sites occur a total of {numDuplicateGuides:,} times.')
printer(f'\tRemoving {numDuplicateGuides:,} of {numIdentifiedGuides:,} ({duplicatePercent}%) guides.')
printer(f'\t{len(candidateGuides):,} distinct guides have been discovered so far.')

Expand All @@ -277,7 +277,7 @@ def processSequence(sequence):
for batchFile in guideBatchinator:
batchStartTime = time.time()

printer(f'Processing batch file {(batchFileId+1):,} of {len(guideBatchinator)}')
printer(f'Processing batch file {(batchFileId+1):,} of {len(guideBatchinator):,}')

# Create new candidate guide dictionary
candidateGuides = {}
Expand Down Expand Up @@ -401,14 +401,17 @@ def processSequence(sequence):
errorCount = 0
notFoundCount = 0

pgLength = int(configMngr['rnafold']['page-length'])
pgLength = min(
int(configMngr['input']['batch-size']),
int(configMngr['rnafold']['page-length'])
)

for pgIdx, pageCandidateGuides in Paginator(
filterCandidateGuides(candidateGuides, MODULE_MM10DB),
pgLength
):
if pgLength > 0:
printer(f'\tProcessing page {(pgIdx+1)} ({pgLength:,} per page).')
printer(f'\tProcessing page {(pgIdx+1):,} (max {pgLength:,} per page).')

if os.path.exists(configMngr['rnafold']['output']):
os.remove(configMngr['rnafold']['output'])
Expand Down Expand Up @@ -501,10 +504,10 @@ def processSequence(sequence):
printer(f'\t{failedCount:,} of {testedCount:,} failed here.')

if errorCount > 0:
printer(f'\t{errorCount} of {testedCount} erred here.')
printer(f'\t{errorCount:,} of {testedCount:,} erred here.')

if notFoundCount > 0:
printer(f'\t{notFoundCount} of {testedCount} not found in RNAfold output.')
printer(f'\t{notFoundCount:,} of {testedCount:,} not found in RNAfold output.')

#########################################
## Calc mm10db result ##
Expand All @@ -529,9 +532,9 @@ def processSequence(sequence):
candidateGuides[target23]['acceptedByMm10db'] = CODE_ACCEPTED
acceptedCount += 1

printer(f'\t{acceptedCount} accepted.')
printer(f'\t{acceptedCount:,} accepted.')

printer(f'\t{failedCount} failed.')
printer(f'\t{failedCount:,} failed.')

del acceptedCount

Expand Down Expand Up @@ -606,15 +609,18 @@ def processSequence(sequence):
testedCount = 0
failedCount = 0

pgLength = int(configMngr['bowtie2']['page-length'])
pgLength = min(
int(configMngr['input']['batch-size']),
int(configMngr['bowtie2']['page-length'])
)

for pgIdx, pageCandidateGuides in Paginator(
filterCandidateGuides(candidateGuides, MODULE_SPECIFICITY),
pgLength
):

if pgLength > 0:
printer(f'\tProcessing page {(pgIdx+1)} ({pgLength:,} per page).')
printer(f'\tProcessing page {(pgIdx+1):,} (max {pgLength:,} per page).')

if os.path.exists(configMngr['bowtie2']['output']):
os.remove(configMngr['bowtie2']['output'])
Expand Down Expand Up @@ -732,15 +738,18 @@ def processSequence(sequence):
testedCount = 0
failedCount = 0

pgLength = int(configMngr['offtargetscore']['page-length'])
pgLength = min(
int(configMngr['input']['batch-size']),
int(configMngr['offtargetscore']['page-length'])
)

for pgIdx, pageCandidateGuides in Paginator(
filterCandidateGuides(candidateGuides, MODULE_SPECIFICITY),
pgLength
):

if pgLength > 0:
printer(f'\tProcessing page {(pgIdx+1)} ({pgLength:,} per page).')
printer(f'\tProcessing page {(pgIdx+1):,} (max {pgLength:,} per page).')

# prepare the list of candidate guides to score
guidesInPage = 0
Expand Down Expand Up @@ -873,7 +882,7 @@ def processSequence(sequence):
#########################################
printer('Done.')

printer(f'{len(candidateGuides)} guides evaluated.')
printer(f'{len(candidateGuides):,} guides evaluated.')

printer('This batch ran in {} (dd hh:mm:ss) or {} seconds'.format(
time.strftime('%d %H:%M:%S', time.gmtime((time.time() - batchStartTime))),
Expand Down