MontgomeryLab · taimontgomery · Jan 13, 2023 · Jan 11, 2023 · Jan 11, 2023 · Jan 11, 2023
diff --git a/START_HERE/run_config.yml b/START_HERE/run_config.yml
@@ -285,6 +285,10 @@ plot_vector_points: False
 plot_len_dist_min:
 plot_len_dist_max:
 
+##-- Optionally set the log2 min and/or max view limits for scatter_by_dge plots; auto if unset --##
+plot_dge_scatter_min:
+plot_dge_scatter_max:
+
 ##-- Use this label in class plots for counts assigned by rules lacking a classifier --##
 plot_unknown_class: "_UNKNOWN_"
 
@@ -322,7 +326,7 @@ dir_name_plotter: plots
 #
 ###########################################################################################
 
-version: 1.2.1
+version: 1.2.2
 
 ######--------------------------- DERIVED FROM PATHS FILE ---------------------------######
 #

diff --git a/dev/macos_pycharm_r_plugin_patch.py b/dev/macos_pycharm_r_plugin_patch.py
@@ -0,0 +1,46 @@
+
+"""Addresses the error 'RWrapper terminated, exit code: 134 ... rpath ...'
+
+This issue is specific to macOS. The R Language plugin for Pycharm crashes when the R has been installed
+via Conda. The issue is described here: https://youtrack.jetbrains.com/issue/R-1271
+
+The issue is still present in Pycharm 2022.3.1.
+This patch will need to be applied every time the plugin is updated."""
+
+import platform
+import sys
+import os
+import re
+
+from glob import glob
+
+if platform.system() != 'Darwin':
+    sys.exit("This patch is only for macOS")
+
+# Get PyCharm directory most recently modified (assumed to be the latest version)
+home_dir = os.path.expanduser("~")
+pycharm_dirs = glob(f"{home_dir}/Library/Application Support/JetBrains/PyCharm*")
+latest_pycharm = sorted(pycharm_dirs, key=os.path.getmtime)[-1]
+
+# The replacement R function
+patched_fn = """getLDLibraryPath <- function() {
+  conda_path <- Sys.getenv("CONDA_EXE", unset = NA)
+  if (!is.na(conda_path)) conda_path = dirname(dirname(conda_path))
+  r_path <- Sys.getenv("R_HOME", unset = NA)
+  if (!is.na(r_path)) r_path = dirname(dirname(dirname(dirname(r_path))))
+
+  if (get_os() == "osx" && r_path != conda_path) Sys.getenv("DYLD_FALLBACK_LIBRARY_PATH")
+  else if (get_os() == "linux") Sys.getenv("LD_LIBRARY_PATH")
+  else ""
+}"""
+
+# Apply the patch
+target_file = f"{latest_pycharm}/plugins/r-plugin/R/GetEnvVars.R"
+with open(target_file, "r+") as f:
+    file_contents = f.read()
+    patched = re.sub(r"getLDLibraryPath <- function\(\) {.*}", patched_fn, file_contents, flags=re.DOTALL)
+
+    # Overwrite the file
+    f.seek(0)
+    f.write(patched)
+    f.truncate()
diff --git a/doc/Parameters.md b/doc/Parameters.md
@@ -235,6 +235,14 @@ The scatter plots produced by tiny-plot have rasterized points by default. This
 
 The min and/or max bounds for plotted lengths can be set with this option. See [tiny-plot's documentation](tiny-plot.md#length-bounds) for more information about how these values are determined if they aren't set.
 
+### Bounds for scatter_by_dge Plots
+| Run Config Key        | Commandline Argument |
+|-----------------------|----------------------|
+| plot_dge_scatter_min: | `--dge-min VALUE`    | 
+| plot_dge_scatter_max: | `--dge-max VALUE`    |
+
+The min and/or max bounds for DGE scatter plots can be set with this option. The value you provide should be a log2 count value and can be whole or fractional, e.g. `--dge-min 1.9` would produce a plot whose first tick mark is labeled 2 and would include points for feature counts as low as 3.74. Unspecified bounds are automatically calculated to fit the data, and will include the margin specified by the `axes.[x/y]margin` key in the [Plot Stylesheet](Configuration.md#plot-stylesheet-details).
+
 ### Labels for Class-related Plots
 | Run Config Key         | Commandline Argument |
 |------------------------|----------------------|
@@ -256,9 +264,9 @@ If an inclusive filter is used, then only the classes in the list, if present, a
 tiny-plot [-rc RAW_COUNTS] [-nc NORM_COUNTS] [-uc RULE_COUNTS]
           [-ss STAT] [-dge COMPARISON [COMPARISON ...]]
           [-len 5P_LEN [5P_LEN ...]] [-o PREFIX] [-pv VALUE]
-          [-s MPLSTYLE] [-v] [-ldi VALUE] [-lda VALUE] [-una LABEL]
-          [-unk LABEL] [-ic CLASS [CLASS ...] | -ec CLASS [CLASS ...]]
-          -p PLOT [PLOT ...]
+          [-s MPLSTYLE] [-v] [-ldi VALUE] [-lda VALUE] [-dgi VALUE]
+          [-dga VALUE] [-una LABEL] [-unk LABEL] [-ic CLASS [CLASS ...]
+          | -ec CLASS [CLASS ...]] -p PLOT [PLOT ...]
 
 This script produces basic static plots for publication as part of the tinyRNA
 workflow.
@@ -317,6 +325,10 @@ Optional arguments:
                         len_dist plots will start at this value
   -lda VALUE, --len-dist-max VALUE
                         len_dist plots will end at this value
+  -dgi VALUE, --dge-min VALUE
+                        scatter_by_dge plots will start at this log2 value
+  -dga VALUE, --dge-max VALUE
+                        scatter_by_dge plots will end at this log2 value
   -una LABEL, --unassigned-class LABEL
                         Use this label in class-related plots for unassigned
                         counts

diff --git a/doc/tiny-plot.md b/doc/tiny-plot.md
@@ -102,9 +102,14 @@ Differential gene expression between sample groups can be visualized with this p
     <img src="../images/plots/scatter_dge.jpg" width="80%" alt="sample_avg_scatter_by_dge"/>
 </p>
 
-#### Customization
-The P value cutoff can be changed using the [Run Config or commandline arguments](Parameters.md#p-value). The control condition is plotted on the x-axis, but it must be specified in your Samples Sheet prior to running an end-to-end or `tiny recount` analysis. If using `tiny replot`, is not possible to change a no-control experiment to a control experiment and have these changes reflected in these plots. This is because tiny-deseq.r must be aware of the control condition in order to perform the proper directional comparisons.
+#### P value Threshold
+The P value cutoff [can be changed](Parameters.md#p-value) (default: 0.05).
 
+#### Control Conditions
+The control condition is plotted on the x-axis, but it must be specified in your Samples Sheet prior to running an end-to-end or `tiny recount` analysis. If using `tiny replot`, is not possible to change a no-control experiment to a control experiment and have these changes reflected in these plots. This is because tiny-deseq.r must be aware of the control condition in order to perform the proper directional comparisons.
+
+#### View Limits
+Both the lower and upper bound of the plot's axes [can be set manually](Parameters.md#view-limits). Unspecified bounds are automatically calculated to fit the data.
 
 
 

diff --git a/setup.py b/setup.py
@@ -14,7 +14,7 @@
 AUTHOR = 'Kristen Brown, Alex Tate'
 PLATFORM = 'Unix'
 REQUIRES_PYTHON = '>=3.9.0'
-VERSION = '1.2.1'
+VERSION = '1.2.2'
 REQUIRED = []  # Required packages are installed via Conda's environment.yml
 
 

diff --git a/tests/testdata/config_files/run_config_template.yml b/tests/testdata/config_files/run_config_template.yml
@@ -285,12 +285,21 @@ plot_vector_points: False
 plot_len_dist_min:
 plot_len_dist_max:
 
+##-- Optionally set the log2 min and/or max view limits for scatter_by_dge plots; auto if unset --##
+plot_dge_scatter_min:
+plot_dge_scatter_max:
+
 ##-- Use this label in class plots for counts assigned by rules lacking a classifier --##
 plot_unknown_class: "_UNKNOWN_"
 
 ##-- Use this label in class plots for unassigned counts --##
 plot_unassigned_class: "_UNASSIGNED_"
 
+##-- Optionally filter the classes in class scatter plots --##
+plot_class_scatter_filter:
+  style: include  # Choose: include or exclude
+  classes: []     # Add classes between [ and ], separated by comma
+
 
 ######----------------------------- OUTPUT DIRECTORIES ------------------------------######
 #
@@ -317,7 +326,7 @@ dir_name_plotter: plots
 #
 ###########################################################################################
 
-version: 1.2
+version: 1.2.2
 
 ######--------------------------- DERIVED FROM PATHS FILE ---------------------------######
 #
@@ -367,4 +376,17 @@ run_deseq: True
 #
 # The following configuration settings are automatically derived from the Features Sheet
 #
-######-------------------------------------------------------------------------------######
+######-------------------------------------------------------------------------------######
+
+
+
+######--------------------------- DERIVED FROM RUN CONFIG ---------------------------######
+#
+# The following configuration settings are automatically derived from this file
+#
+######-------------------------------------------------------------------------------######
+
+##-- Utilized by tiny-plot --##
+# Filters for class scatter plots
+plot_class_scatter_filter_include: []
+plot_class_scatter_filter_exclude: []
diff --git a/tiny/cwl/tools/tiny-plot.cwl b/tiny/cwl/tools/tiny-plot.cwl
@@ -74,6 +74,18 @@ inputs:
       prefix: -lda
     doc: "The last length to plot in the range for len_dist plots"
 
+  dge_min:
+    type: double?
+    inputBinding:
+      prefix: -dgi
+    doc: "The log2 lower view limit in DGE scatter plots"
+
+  dge_max:
+    type: double?
+    inputBinding:
+      prefix: -dga
+    doc: "The log2 upper view limit in DGE scatter plots"
+
   unknown_class_label:
     type: string?
     inputBinding:

diff --git a/tiny/cwl/workflows/tinyrna_wf.cwl b/tiny/cwl/workflows/tinyrna_wf.cwl
@@ -99,6 +99,8 @@ inputs:
   plot_vector_points: boolean?
   plot_len_dist_min: int?
   plot_len_dist_max: int?
+  plot_dge_scatter_min: double?
+  plot_dge_scatter_max: double?
   plot_style_sheet: File?
   plot_pval: float?
   plot_unknown_class: string?
@@ -258,6 +260,8 @@ steps:
         pickValue: all_non_null
         valueFrom: |
           $(self.length ? self[0] : null)
+      dge_min: plot_dge_scatter_min
+      dge_max: plot_dge_scatter_max
       unknown_class_label: plot_unknown_class
       unassigned_class_label: plot_unassigned_class
       classes_include: plot_class_scatter_filter_include

diff --git a/tiny/rna/plotter.py b/tiny/rna/plotter.py
@@ -65,6 +65,10 @@ def get_args():
                                help='len_dist plots will start at this value')
     optional_args.add_argument('-lda', '--len-dist-max', metavar='VALUE', type=int,
                                help='len_dist plots will end at this value')
+    optional_args.add_argument('-dgi', '--dge-min', metavar='VALUE', type=float,
+                               help='scatter_by_dge plots will start at this log2 value')
+    optional_args.add_argument('-dga', '--dge-max', metavar='VALUE', type=float,
+                               help='scatter_by_dge plots will end at this log2 value')
     optional_args.add_argument('-una', '--unassigned-class', metavar='LABEL', default='_UNASSIGNED_',
                                help='Use this label in class-related plots for unassigned counts'),
     optional_args.add_argument('-unk', '--unknown-class', metavar='LABEL', default='_UNKNOWN_',
@@ -163,7 +167,7 @@ def get_len_dist_dict(files_list: list) -> DefaultDict[str, Dict[str, pd.DataFra
             # File does not appear to have been produced by the pipeline
             condition_and_rep = basename
 
-        subtype = "Assigned" if "assigned" in condition_and_rep else "Mapped"
+        subtype = "assigned" if "assigned" in condition_and_rep else "mapped"
         matrices[subtype][condition_and_rep] = pd.read_csv(file, index_col=0)
 
     return matrices
@@ -186,7 +190,7 @@ def class_charts(raw_class_counts: pd.DataFrame, mapped_reads: pd.Series, out_pr
 
     for library in raw_class_counts:
         chart = aqplt.barh_proportion(class_props[library], max_prop, scale, **kwargs)
-        chart.set_title("Percentage of Small RNAs by Class")
+        chart.set_title("Percentage of small RNAs by class")
         chart.set_ylabel("Class")
 
         # Save the plot
@@ -214,7 +218,7 @@ def rule_charts(rule_counts: pd.DataFrame, out_prefix: str, scale=2, **kwargs):
 
     for library, prop_df in rule_props.items():
         chart = aqplt.barh_proportion(prop_df, max_prop, scale, **kwargs)
-        chart.set_title("Percentage of Small RNAs by Matched Rule")
+        chart.set_title("Percentage of small RNAs by matched rule")
         chart.set_ylabel("Rule")
 
         # Save the plot
@@ -405,7 +409,7 @@ def scatter_by_dge_class(counts_avg_df, dges, output_prefix, view_lims, include=
     aqplt.set_dge_class_legend_style()
 
     for pair in dges:
-        ut, tr = pair.split("_vs_")  # untreated, treated
+        tr, ut = pair.split("_vs_")  # treated, untreated
         dge_classes = dges[dges[pair] < pval].groupby(level=1).groups
 
         labels, grp_args = zip(*dge_classes.items()) if dge_classes else ((), ())
@@ -442,7 +446,7 @@ def scatter_by_dge(counts_avg_df, dges, output_prefix, view_lims, pval=0.05):
 
     for pair in dges:
         grp_args = dges.index[dges[pair] < pval]
-        ut, tr = pair.split("_vs_")  # untreated, treated
+        tr, ut = pair.split("_vs_")  # treated, untreated
 
         labels = ['p < %g' % pval] if not grp_args.empty else []
         colors = aqplt.assign_class_colors(labels)
@@ -615,7 +619,7 @@ def setup(args: argparse.Namespace) -> dict:
         'sample_rep_dict': lambda: get_sample_rep_dict(fetched["norm_counts_df"]),
         'norm_counts_avg_df': lambda: get_sample_averages(fetched["norm_counts_df"], fetched["sample_rep_dict"]),
         'class_counts_df': lambda: get_class_counts(fetched["raw_counts_df"]),
-        'avg_view_lims': lambda: aqplt.get_scatter_view_lims(fetched["norm_counts_avg_df"]),
+        'avg_view_lims': lambda: aqplt.get_scatter_view_lims(fetched["norm_counts_avg_df"], args.dge_min, args.dge_max),
         'norm_view_lims': lambda: aqplt.get_scatter_view_lims(fetched["norm_counts_df"].select_dtypes(['number']))
     }
 

diff --git a/tiny/rna/plotterlib.py b/tiny/rna/plotterlib.py
@@ -77,9 +77,9 @@ def len_dist_bar(self, size_prop: pd.DataFrame, subtype: str, **kwargs) -> plt.A
             sizeb = size_prop.plot(kind='bar', stacked=True, reuse_plot=True, **kwargs)
             sizeb.tick_params(axis='x', labelsize=font_size, rotation=0)
             sizeb.set_ylim(0, np.max(np.sum(size_prop, axis=1)) + 0.025)
-            sizeb.set_title(f'Distribution of {subtype} Reads')
-            sizeb.set_ylabel('Proportion of Reads')
-            sizeb.set_xlabel('Length of Sequence')
+            sizeb.set_title(f'Distribution of {subtype} reads')
+            sizeb.set_ylabel('Proportion of reads')
+            sizeb.set_xlabel('Length of sequence')
 
         return sizeb
 
@@ -134,7 +134,7 @@ def barh_proportion(self, prop_ds: pd.Series, max_prop=1.0, scale=2, **kwargs) -
         # Create the plot and set plot attributes
         cbar = (prop_ds * 100).plot(kind='barh', ax=ax, color=bar_colors, sort_columns=False, **kwargs)
         cbar.xaxis.set_major_formatter(tix.PercentFormatter())
-        cbar.set_xlabel('Percentage of Reads')
+        cbar.set_xlabel('Percentage of reads')
         cbar.set_xlim(0, min([(max_prop * 100) + 10, 100]))
 
         # Remove irrelevant plot attributes
@@ -321,28 +321,54 @@ def set_dge_class_legend_style(self):
         scatter.set_position(orig_axes_pos.transformed(transFigure.inverted()))
 
     @staticmethod
-    def get_scatter_view_lims(counts_df: pd.DataFrame) -> Tuple[float, float]:
-        """Calculates scatter view limits for the counts dataframe"""
+    def get_scatter_view_lims(counts_df: pd.DataFrame, vmin: int = None, vmax: int = None) -> Tuple[float, float]:
+        """Calculates scatter view limits for the counts dataframe
 
-        x0 = counts_df.min(axis='columns').where(lambda x: x != 0).dropna().min()
+        Args:
+            counts_df: A pandas dataframe of counts per feature
+            vmin: Optional log2 minimum view limit
+            vmax: Optional log2 maximum view limit
+        """
+
+        # For transforming values to/from log2 scale
+        transform = LogTransform(base=2)
+        inverse_trans = transform.inverted()
+
+        # User-specified min & max, no calculation necessary
+        if (vmin, vmax) != (None, None):
+            return inverse_trans.transform([vmin, vmax])
+
+        # Obtain the minimum and maximum counts from the counts dataframe
+        x0 = counts_df.replace(0, pd.NA).min(axis="columns").dropna().min()
         x1 = counts_df.max().max()
         minpos = 1e-300
 
         if not np.isfinite([x0, x1]).all() or not isinstance(x0, np.float) or x1 <= 0:
-            print("The provided dataset contains invalid values.")
+            print("The provided dataset contains invalid values.", file=sys.stderr)
             return (minpos, minpos)
 
+        # Avoid log2(0) errors
         x0, x1 = (minpos if x0 <= 0 else x0,
                   minpos if x1 <= 0 else x1)
 
-        transform = LogTransform(base=2)
-        inverse_trans = transform.inverted()
+        # Get axes margin preferences from stylesheet
+        rc_mar = {mpl.rcParams.get(f"axes.{m}", 0)
+                  for m in ('xmargin', 'ymargin')}
+
+        margin = max(rc_mar)
+        if len(rc_mar) != 1:
+            print("Stylesheet values for axes.xmargin and axes.ymargin differ. "
+                  "The larger value will be chosen for the scatter plot margin.",
+                  file=sys.stderr)
 
+        # Calculate plot margins
         x0t, x1t = transform.transform([x0, x1])
-        delta = (x1t - x0t) * mpl.rcParams.get('axes.xmargin', 0)
+        delta = (x1t - x0t) * margin
         if not np.isfinite(delta): delta = 0
 
-        return inverse_trans.transform([x0t - delta, x1t + delta])
+        if vmin is None: vmin = x0t - delta
+        if vmax is None: vmax = x1t + delta
+        return inverse_trans.transform([vmin, vmax])
 
     @staticmethod
     def set_square_scatter_view_lims(ax: plt.Axes, min_max=None):
@@ -392,7 +418,7 @@ def set_scatter_ticks(self, ax: plt.Axes, minor_ticks=False):
         """Intelligently creates major and minor ticks for a square scatter plot while avoiding crowding"""
 
         # Get tick locations corresponding to the current view limits
-        major_locs, ax_min, ax_max = self.get_fixed_majorticklocs(ax.viewLim.bounds)
+        major_locs, ax_min, ax_max = self.get_fixed_majorticklocs(ax.viewLim.extents)
 
         ax.xaxis.set_major_locator(tix.FixedLocator(major_locs))
         ax.yaxis.set_major_locator(tix.FixedLocator(major_locs))
@@ -403,7 +429,7 @@ def set_scatter_ticks(self, ax: plt.Axes, minor_ticks=False):
 
         for axis in [ax.xaxis, ax.yaxis]:
             # Only display every nth major tick label
-            n = int(np.log2(len(major_locs)) - 1)
+            n = int(np.log2(len(major_locs)) - 1) or 1
             ticks_displayed, last_idx = self.every_nth_label(axis, n)
 
             if minor_ticks: