From 48986991ab5f8c29dae75ef540a21b1d79a3949b Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Wed, 31 May 2023 15:19:13 -0700 Subject: [PATCH] add --check-long-deletion option --- bin/proviral | 12 ++++++++---- intact/intact.py | 21 +++++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/bin/proviral b/bin/proviral index 077fefc..f1d5b61 100755 --- a/bin/proviral +++ b/bin/proviral @@ -46,6 +46,9 @@ def cli(): @click.option( '--run-hypermut/--no-hypermut', default=False ) +@click.option( + '--check-long-deletion/--ignore-long-deletion', default=False +) @click.option( '--include-small-orfs/--exclude-small-orfs', default=False) @click.option( @@ -53,9 +56,9 @@ def cli(): default=os.getcwd() ) -def intact(input_file, subtype, include_packaging_signal, - include_rre, check_major_splice_donor_site, run_hypermut, - include_small_orfs, working_folder): +def intact(input_file, subtype, include_packaging_signal, + include_rre, check_major_splice_donor_site, run_hypermut, + check_long_deletion, include_small_orfs, working_folder): """ Check consensus sequences for intactness. """ @@ -69,7 +72,8 @@ def intact(input_file, subtype, include_packaging_signal, try: intact_sequences, non_intact_sequences, orfs, errors = it.intact( folder, input_file, subtype, include_packaging_signal, include_rre, - check_major_splice_donor_site, run_hypermut, include_small_orfs + check_major_splice_donor_site, run_hypermut, check_long_deletion, + include_small_orfs ) log.info('Intact sequences written to ' + intact_sequences) log.info('Non-intact sequences written to ' + non_intact_sequences) diff --git a/intact/intact.py b/intact/intact.py index 97cfd83..25d851b 100644 --- a/intact/intact.py +++ b/intact/intact.py @@ -107,7 +107,22 @@ def isHypermut(aln): #/end isHypermut +def has_long_deletion(sequence, alignment): + """ + Determines whether the sequence has a long deletion in it. + Keyword Args: + sequence -- the query sequence. + alignment -- multiple sequence alignment object containing the + reference and query sequence. + """ + # NOTE: This is the same check that HIVSeqInR uses. + if len(sequence.seq) < 8000: + return IntactnessError(sequence.id, + LONGDELETION_ERROR, + "Query sequence contains a long deletion.") + return None +#/end has_long_deletion def has_mutated_major_splice_donor_site(alignment, @@ -554,6 +569,7 @@ def intact( working_dir, include_rre, check_major_splice_donor_site, run_hypermut, + check_long_deletion, include_small_orfs, hxb2_forward_orfs = const.DEFAULT_FORWARD_ORFs, hxb2_reverse_orfs = const.DEFAULT_REVERSE_ORFS, @@ -667,6 +683,11 @@ def intact( working_dir, if hypermutated is not None: sequence_errors.append(hypermutated) + if check_long_deletion is not None: + long_deletion = has_long_deletion(sequence, alignment) + if long_deletion: + sequence_errors.append(long_deletion) + orfs[sequence.id] = hxb2_found_orfs if len(sequence_errors) == 0: intact_sequences.append(sequence)