Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions bin/proviral
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,19 @@ def cli():
@click.option(
'--run-hypermut/--no-hypermut', default=False
)
@click.option(
'--check-long-deletion/--ignore-long-deletion', default=False
)
@click.option(
'--include-small-orfs/--exclude-small-orfs', default=False)
@click.option(
'--working-folder',
default=os.getcwd()
)

def intact(input_file, subtype, include_packaging_signal,
include_rre, check_major_splice_donor_site, run_hypermut,
include_small_orfs, working_folder):
def intact(input_file, subtype, include_packaging_signal,
include_rre, check_major_splice_donor_site, run_hypermut,
check_long_deletion, include_small_orfs, working_folder):
"""
Check consensus sequences for intactness.
"""
Expand All @@ -69,7 +72,8 @@ def intact(input_file, subtype, include_packaging_signal,
try:
intact_sequences, non_intact_sequences, orfs, errors = it.intact(
folder, input_file, subtype, include_packaging_signal, include_rre,
check_major_splice_donor_site, run_hypermut, include_small_orfs
check_major_splice_donor_site, run_hypermut, check_long_deletion,
include_small_orfs
)
log.info('Intact sequences written to ' + intact_sequences)
log.info('Non-intact sequences written to ' + non_intact_sequences)
Expand Down
21 changes: 21 additions & 0 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,22 @@ def isHypermut(aln):
#/end isHypermut


def has_long_deletion(sequence, alignment):
"""
Determines whether the sequence has a long deletion in it.
Keyword Args:

sequence -- the query sequence.
alignment -- multiple sequence alignment object containing the
reference and query sequence.
"""
# NOTE: This is the same check that HIVSeqInR uses.
if len(sequence.seq) < 8000:
return IntactnessError(sequence.id,
LONGDELETION_ERROR,
"Query sequence contains a long deletion.")
return None
#/end has_long_deletion


def has_mutated_major_splice_donor_site(alignment,
Expand Down Expand Up @@ -554,6 +569,7 @@ def intact( working_dir,
include_rre,
check_major_splice_donor_site,
run_hypermut,
check_long_deletion,
include_small_orfs,
hxb2_forward_orfs = const.DEFAULT_FORWARD_ORFs,
hxb2_reverse_orfs = const.DEFAULT_REVERSE_ORFS,
Expand Down Expand Up @@ -667,6 +683,11 @@ def intact( working_dir,
if hypermutated is not None:
sequence_errors.append(hypermutated)

if check_long_deletion is not None:
long_deletion = has_long_deletion(sequence, alignment)
if long_deletion:
sequence_errors.append(long_deletion)

orfs[sequence.id] = hxb2_found_orfs
if len(sequence_errors) == 0:
intact_sequences.append(sequence)
Expand Down