diff --git a/START_HERE/run_config.yml b/START_HERE/run_config.yml index 260b146a..c905184f 100644 --- a/START_HERE/run_config.yml +++ b/START_HERE/run_config.yml @@ -202,10 +202,6 @@ shared_memory: False ##-- Suppress all alignments if > exist (default: no limit) (option -m) --## #suppress_aln: 10 -##-- Trim bases from 5' (left) or 3' (right) end of reads (options --trim5 and --trim3) --## -#trim5: 0 -#trim3: 0 - ##-- Input quals are from GA Pipeline ver. < 1.3 (option --solexa-quals) --## #solexa: false @@ -221,9 +217,6 @@ shared_memory: False ######-------------------------------------------------------------------------------###### -##-- If True: show all parsed features in the counts csv, regardless of count/identity --## -counter_all_features: False - ##-- If True: counts are normalized by genomic hits (number of multi-alignments) --## counter_normalize_by_genomic_hits: True diff --git a/images/features_sheet_header.png b/images/features_sheet_header.png index 0a868931..204d2726 100644 Binary files a/images/features_sheet_header.png and b/images/features_sheet_header.png differ diff --git a/images/tiny-count_selection.png b/images/tiny-count_selection.png index 9de0aa68..0e99d592 100644 Binary files a/images/tiny-count_selection.png and b/images/tiny-count_selection.png differ diff --git a/tests/testdata/config_files/run_config_template.yml b/tests/testdata/config_files/run_config_template.yml index 04fda2f8..74cdd0b8 100644 --- a/tests/testdata/config_files/run_config_template.yml +++ b/tests/testdata/config_files/run_config_template.yml @@ -202,10 +202,6 @@ shared_memory: False ##-- Suppress all alignments if > exist (default: no limit) (option -m) --## #suppress_aln: 10 -##-- Trim bases from 5' (left) or 3' (right) end of reads (options --trim5 and --trim3) --## -#trim5: 0 -#trim3: 0 - ##-- Input quals are from GA Pipeline ver. < 1.3 (option --solexa-quals) --## #solexa: false @@ -221,9 +217,6 @@ shared_memory: False ######-------------------------------------------------------------------------------###### -##-- If True: show all parsed features in the counts csv, regardless of count/identity --## -counter_all_features: False - ##-- If True: counts are normalized by genomic hits (number of multi-alignments) --## counter_normalize_by_genomic_hits: True diff --git a/tiny/cwl/tools/tiny-count.cwl b/tiny/cwl/tools/tiny-count.cwl index f2a695a6..2b54b683 100644 --- a/tiny/cwl/tools/tiny-count.cwl +++ b/tiny/cwl/tools/tiny-count.cwl @@ -50,11 +50,6 @@ inputs: inputBinding: prefix: --stepvector - all_features: - type: boolean? - inputBinding: - prefix: --all-features - in_pipeline: type: boolean? inputBinding: diff --git a/tiny/cwl/workflows/tinyrna_wf.cwl b/tiny/cwl/workflows/tinyrna_wf.cwl index 582ded97..0380bae8 100644 --- a/tiny/cwl/workflows/tinyrna_wf.cwl +++ b/tiny/cwl/workflows/tinyrna_wf.cwl @@ -86,7 +86,6 @@ inputs: counter_diags: boolean? counter_decollapse: boolean? counter_stepvector: string? - counter_all_features: boolean? counter_normalize_by_feature_hits: boolean? counter_normalize_by_genomic_hits: boolean? @@ -214,7 +213,6 @@ steps: aligned_seqs: bowtie/sam_out gff_files: gff_files out_prefix: run_name - all_features: counter_all_features normalize_by_feature_hits: source: counter_normalize_by_feature_hits valueFrom: $(String(self)) # convert boolean -> string diff --git a/tiny/rna/counter/counter.py b/tiny/rna/counter/counter.py index 5e577cdc..e451941c 100644 --- a/tiny/rna/counter/counter.py +++ b/tiny/rna/counter/counter.py @@ -64,9 +64,7 @@ def get_args(): optional_args.add_argument('-sv', '--stepvector', choices=['Cython', 'HTSeq'], default='Cython', help='Select which StepVector implementation is used to find ' 'features overlapping an interval.') - optional_args.add_argument('-a', '--all-features', action='store_true', help=argparse.SUPPRESS) - #help='Represent all features in output counts table, ' - # 'even if they did not match in Stage 1 selection.') + optional_args.add_argument('-a', '--all-features', action='store_true', help=argparse.SUPPRESS) # deprecated optional_args.add_argument('-p', '--in-pipeline', action='store_true', help='Indicates that tiny-count was invoked as part of a pipeline run ' 'and that input files should be sourced as such.') diff --git a/tiny/rna/plotter.py b/tiny/rna/plotter.py index 1271d325..78b30e03 100644 --- a/tiny/rna/plotter.py +++ b/tiny/rna/plotter.py @@ -362,9 +362,9 @@ def load_dge_tables(comparisons: list, class_fillna: str) -> pd.DataFrame: if not comparison: raise ValueError("Could not find condition names in DGE filename: " + dgefile) if len(comparison) > 1: - print("Warning: multiple conditions matched in DGE filename. Using first match.") + print("Warning: multiple conditions matched in DGE filename. Using last match.") - comparison_name = "_vs_".join(comparison[0]) + comparison_name = "_vs_".join(comparison[-1]) table = set_counts_table_multiindex(pd.read_csv(dgefile), class_fillna) de_table[comparison_name] = table['padj'] diff --git a/tiny/rna/plotterlib.py b/tiny/rna/plotterlib.py index e5516765..08fba8ec 100644 --- a/tiny/rna/plotterlib.py +++ b/tiny/rna/plotterlib.py @@ -241,8 +241,8 @@ def scatter_grouped(self, count_x: pd.DataFrame, count_y: pd.DataFrame, *groups, has_outgroup = all(co.replace(0, pd.NA).dropna().any() for co in (count_x_out, count_y_out)) - # Determine which groups we are able to plot on log scale - plottable_groups = self.get_nonzero_group_indexes(count_x, count_y, groups) + # Make all counts log-compatible, or drop the group if it is zero in both conditions + plottable_groups = self.nonzero_group_indexes(count_x, count_y, groups, view_lims) plot_labels = [labels[i] for i in plottable_groups] plot_groups = [groups[i] for i in plottable_groups] group_it = iter(plot_groups) @@ -271,20 +271,34 @@ def scatter_grouped(self, count_x: pd.DataFrame, count_y: pd.DataFrame, *groups, return gscat @staticmethod - def get_nonzero_group_indexes(count_x, count_y, groups): - """When scatter plotting groups for two conditions on a log scale, if one - of the conditions has all zero counts for the group, then none of the group's - points are actually plotted due to the singularity at 0. We want to skip - plotting these groups and omit them from the legend.""" + def nonzero_group_indexes(count_x, count_y, groups, view_lims): + """When scatter plotting features on a log scale, if the feature has a count of + zero in either condition then it is omitted from the plot by default due to the + singularity at 0, but we want to represent them nonetheless. So, we set their count + in the zero condition to the lower plot limit so that they are plotted on the very + edge of the plot space. Otherwise, approximating zero for these features would + shrink the plot. + + We still want to omit features that are zero in both conditions, and if an entire + group consists of zero counts, its label should be omitted from the legend. This + is accomplished by omitting its index from the returned list.""" non_zero_groups = [] + minpos = min(view_lims) for i, group in enumerate(groups): x, y = count_x.loc[group], count_y.loc[group] x_is_zeros = x.replace(0, pd.NA).dropna().empty y_is_zeros = y.replace(0, pd.NA).dropna().empty - if not (x_is_zeros or y_is_zeros): + + if not (x_is_zeros and y_is_zeros): non_zero_groups.append(i) + # Replace counts that are zero in only one condition + x.loc[(x == 0) & (y != 0)] = minpos + y.loc[(y == 0) & (x != 0)] = minpos + count_x.loc[group] = x + count_y.loc[group] = y + return non_zero_groups @staticmethod diff --git a/tiny/templates/compatibility/run_config_compatibility.yml b/tiny/templates/compatibility/run_config_compatibility.yml index 91e30785..8b13fb8d 100644 --- a/tiny/templates/compatibility/run_config_compatibility.yml +++ b/tiny/templates/compatibility/run_config_compatibility.yml @@ -7,6 +7,10 @@ 1.4.0: + remove: + - counter_all_features + - trim5 + - trim3 rename: - counter_normalize_by_hits: counter_normalize_by_feature_hits add: diff --git a/tiny/templates/run_config_template.yml b/tiny/templates/run_config_template.yml index b8eb7c22..782378fd 100644 --- a/tiny/templates/run_config_template.yml +++ b/tiny/templates/run_config_template.yml @@ -202,10 +202,6 @@ shared_memory: False ##-- Suppress all alignments if > exist (default: no limit) (option -m) --## #suppress_aln: 10 -##-- Trim bases from 5' (left) or 3' (right) end of reads (options --trim5 and --trim3) --## -#trim5: 0 -#trim3: 0 - ##-- Input quals are from GA Pipeline ver. < 1.3 (option --solexa-quals) --## #solexa: false @@ -221,9 +217,6 @@ shared_memory: False ######-------------------------------------------------------------------------------###### -##-- If True: show all parsed features in the counts csv, regardless of count/identity --## -counter_all_features: False - ##-- If True: counts are normalized by genomic hits (number of multi-alignments) --## counter_normalize_by_genomic_hits: True