From 74a92e947d49c9e0f05c9baf1f780a4dc170303a Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Thu, 13 Oct 2022 17:04:03 -0700
Subject: [PATCH 1/9] Simplifying the user's procedure for activating
bowtie-build. Run Config's run_bowtie_build is now an automatically
configured value that is triggered by an empty ebwt prefix in the Paths
Sheet.
---
START_HERE/paths.yml | 5 ++---
START_HERE/run_config.yml | 5 +----
tiny/templates/paths.yml | 5 ++---
tiny/templates/run_config_template.yml | 7 ++-----
4 files changed, 7 insertions(+), 15 deletions(-)
diff --git a/START_HERE/paths.yml b/START_HERE/paths.yml
index c83a7607..cfdd6281 100644
--- a/START_HERE/paths.yml
+++ b/START_HERE/paths.yml
@@ -23,9 +23,8 @@ tmp_directory:
######-------------------------------- BOWTIE-BUILD ---------------------------------######
#
# To build bowtie indexes:
-# 1. Your Run Config file must contain run_bowtie_build: true
-# 2. Your reference genome file(s) must be listed under reference_genome_files (below)
-# 3. ebwt (below) must be an empty string, or ''
+# 1. Your reference genome file(s) must be listed under reference_genome_files (below)
+# 2. ebwt (below) must be empty (nothing after ":")
#
# Once your indexes have been built, this config file will be modified such
# that ebwt points to their location (prefix) within your Run Directory. This
diff --git a/START_HERE/run_config.yml b/START_HERE/run_config.yml
index fc48ef7c..e6489931 100644
--- a/START_HERE/run_config.yml
+++ b/START_HERE/run_config.yml
@@ -28,10 +28,6 @@ paths_config: ./paths.yml
##-- If none provided, the default of user_tinyrna will be used --##
run_name: tinyrna
-##-- If True: run bowtie-build before analyzing libraries --##
-##-- NOTE: this option may be ignored depending on your Paths file. See Paths file. --##
-run_bowtie_build: True
-
##-- Number of threads to use when a step supports multi-threading --##
##-- For best performance, this should be equal to your computer's processor core count --##
threads: 4
@@ -334,6 +330,7 @@ run_directory: ~
tmp_directory: ~
features_csv: { }
samples_csv: { }
+run_bowtie_build: false
reference_genome_files: [ ]
plot_style_sheet: ~
adapter_fasta: ~
diff --git a/tiny/templates/paths.yml b/tiny/templates/paths.yml
index 6d8ed972..c6553f71 100644
--- a/tiny/templates/paths.yml
+++ b/tiny/templates/paths.yml
@@ -23,9 +23,8 @@ tmp_directory:
######-------------------------------- BOWTIE-BUILD ---------------------------------######
#
# To build bowtie indexes:
-# 1. Your Run Config file must contain run_bowtie_build: true
-# 2. Your reference genome file(s) must be listed under reference_genome_files (below)
-# 3. ebwt (below) must be an empty string, or ''
+# 1. Your reference genome file(s) must be listed under reference_genome_files (below)
+# 2. ebwt (below) must be empty (nothing after ":")
#
# Once your indexes have been built, this config file will be modified such
# that ebwt points to their location (prefix) within your Run Directory. This
diff --git a/tiny/templates/run_config_template.yml b/tiny/templates/run_config_template.yml
index 71d85e99..86853a02 100644
--- a/tiny/templates/run_config_template.yml
+++ b/tiny/templates/run_config_template.yml
@@ -22,16 +22,12 @@
user: ~
run_date: ~
run_time: ~
-paths_config: ../../START_HERE/paths.yml
+paths_config: paths.yml
##-- The label for final outputs --##
##-- If none provided, the default of user_tinyrna will be used --##
run_name: my_first_run
-##-- If True: run bowtie-build before analyzing libraries --##
-##-- NOTE: this option may be ignored depending on your Paths file. See Paths file. --##
-run_bowtie_build: True
-
##-- Number of threads to use when a step supports multi-threading --##
##-- For best performance, this should be equal to your computer's processor core count --##
threads: 4
@@ -334,6 +330,7 @@ run_directory: ~
tmp_directory: ~
features_csv: { }
samples_csv: { }
+run_bowtie_build: false
reference_genome_files: [ ]
plot_style_sheet: ~
adapter_fasta: ~
From 129229c91ae8f174aaf3e1daeebda360637eaaa4 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Thu, 13 Oct 2022 17:05:13 -0700
Subject: [PATCH 2/9] Updating the CWL to collect long bowtie indexes (*.ebwtl)
if bowtie-build produces them
---
tiny/cwl/tools/bowtie-build.cwl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tiny/cwl/tools/bowtie-build.cwl b/tiny/cwl/tools/bowtie-build.cwl
index 0658c987..7f4bfd09 100644
--- a/tiny/cwl/tools/bowtie-build.cwl
+++ b/tiny/cwl/tools/bowtie-build.cwl
@@ -83,7 +83,7 @@ outputs:
index_files:
type: File[]
outputBinding:
- glob: $(inputs.ebwt_base).*.ebwt
+ glob: $(inputs.ebwt_base).*.ebwt*
console_output:
type: stdout
\ No newline at end of file
From 05856b02e3cb5ba652646d5685bf6e9c25d92fac Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Thu, 13 Oct 2022 17:14:15 -0700
Subject: [PATCH 3/9] Configuration.setup_ebwt_idx() has been updated for the
new bowtie-build activation procedure. If the user defines ebwt but index
files can't be found, and they also provided their reference genome files,
then the pipeline will automatically rebuild the indexes and update the Paths
Sheet at the end of the end-to-end run.
setup_ebwt_idx() has also been significantly refactored and cleaned up. It has been bugging me for a long time and it feels good to see it in better shape.
---
tiny/rna/configuration.py | 81 ++++++++++++++++++++++++++-------------
1 file changed, 54 insertions(+), 27 deletions(-)
diff --git a/tiny/rna/configuration.py b/tiny/rna/configuration.py
index dde86823..9e629c10 100644
--- a/tiny/rna/configuration.py
+++ b/tiny/rna/configuration.py
@@ -279,38 +279,65 @@ def setup_pipeline(self):
self.templates = resource_filename('tiny', 'templates/')
def setup_ebwt_idx(self):
- """Bowtie index files and prefix"""
-
- # Determine if bowtie-build should run, and set Bowtie index prefix accordingly
- bt_index_prefix = self.paths['ebwt']
- if self['run_bowtie_build'] and not bt_index_prefix:
- if not self['reference_genome_files']:
- raise ValueError(f"If {self.basename} contains 'run_bowtie_build: True', you "
- f"need to provide your reference genome files in {self.paths.basename}")
-
- # Outputs are saved in {run_directory}/bowtie-build, within which prefix is first genome file's basename
- first_genome_file = self.paths.from_here(self['reference_genome_files'][0]['path'])
- bt_index_prefix = self.prefix(os.path.join(
- self['run_directory'], "bowtie-build", os.path.basename(first_genome_file))
- )
-
- self['ebwt'] = self.paths['ebwt'] = bt_index_prefix
+ """Determines Bowtie index prefix and whether bowtie-build should run"""
+
+ # Empty values for ebwt (''/~/None) trigger bowtie-build
+ self['run_bowtie_build'] = not bool(self.paths['ebwt'])
+
+ if self['run_bowtie_build']:
+ # Set the prefix to the run directory outputs. This is necessary
+ # because workflow requires bt_index_files to be a populated list.
+ prefix = self.get_ebwt_prefix()
else:
- # bowtie-build should only run if 'run_bowtie_build' is True AND ebwt (index prefix) is undefined
- self['run_bowtie_build'] = False
- bt_index_prefix = self.paths.from_here(bt_index_prefix)
+ prefix = self.paths.from_here(self.paths['ebwt'])
- # Bowtie index files
- try:
- self['bt_index_files'] = [self.cwl_file(bt_index_prefix + postfix, verify=(not self['run_bowtie_build']))
- for postfix in ['.1.ebwt', '.2.ebwt', '.3.ebwt', '.4.ebwt', '.rev.1.ebwt', '.rev.2.ebwt']]
- except FileNotFoundError as e:
- sys.exit("The following file could not be found from the Bowtie index prefix defined in your Paths File:\n"
- "%s" % (e.filename,))
+ # verify_bowtie_build_outputs() will check if these end up being long indexes
+ self['bt_index_files'] = self.get_bt_index_files(prefix)
# When CWL copies bt_index_filex for the bowtie.cwl InitialWorkDirRequirement, it does not
# preserve the prefix path. What the workflow "sees" is the ebwt files at working dir root
- self["ebwt"] = os.path.basename(self["ebwt"])
+ self["ebwt"] = os.path.basename(prefix)
+
+ def get_ebwt_prefix(self):
+ """Determines the output prefix path for bowtie indexes that haven't been built yet. The basename
+ of the prefix path is simply the basename of the reference genome sans file extension"""
+
+ genome_files = self['reference_genome_files']
+ if not genome_files:
+ raise ValueError("If your Paths Sheet doesn't have a value for \"ebtw:\", then bowtie indexes "
+ "will be built, but you'll need to provide your reference genome files under "
+ '"reference_genome_files:" (also in your Paths Sheet)')
+
+ genome_basename = os.path.basename(genome_files[0]['path'])
+ return self.prefix(os.path.join( # prefix path:
+ self['run_directory'], self['dir_name_bt_build'], genome_basename
+ ))
+
+ def get_bt_index_files(self, prefix):
+ """Builds the list of expected bowtie index files from the ebwt prefix. If an index file
+ doesn't exist then they will be automatically rebuilt from the user's reference genomes.
+ File existence isn't checked if bowtie-build is already scheduled for this run."""
+
+ try:
+ verify_file_paths = not bool(self['run_bowtie_build'])
+ ext = "ebwt"
+
+ return [
+ self.cwl_file(f"{prefix}.{subext}.{ext}", verify=verify_file_paths)
+ for subext in ['1', '2', '3', '4', 'rev.1', 'rev.2']
+ ]
+ except FileNotFoundError as e:
+ problem = "The following Bowtie index file couldn't be found:\n\t%s\n\n" % (e.filename,)
+ rebuild = "Indexes will be built from your reference genome files during this run."
+ userfix = "Please either correct your ebwt prefix or add reference genomes in the Paths File."
+
+ if self['reference_genome_files']:
+ print(problem + rebuild, file=sys.stderr)
+ new_prefix = self.get_ebwt_prefix()
+ self['run_bowtie_build'] = True
+ return self.get_bt_index_files(new_prefix)
+ else:
+ sys.exit(problem + userfix)
def validate_inputs(self):
"""For now, only GFF files are validated here"""
From 875b205a9e4fde1f20653298036a8fdd26407479 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Thu, 13 Oct 2022 18:05:07 -0700
Subject: [PATCH 4/9] Adding execute_post_run_tasks() to Configuration. This
function calls others that verify bowtie-build outputs were produced, updates
index paths if long indexes were produced, then saves updates to the Paths
Sheet and Run Config.
This addresses a long running problem where end-to-end runs with a bowtie-build step would not save the updated ebwt in Paths Sheet if a downstream step produced an error. Now, the updated ebwt path is written to the Paths File at the end of any run where at least bowtie-build ran successfully.
---
tiny/entry.py | 6 +-----
tiny/rna/configuration.py | 23 +++++++++++++++++++++++
2 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/tiny/entry.py b/tiny/entry.py
index feccf61d..6dc6c44c 100644
--- a/tiny/entry.py
+++ b/tiny/entry.py
@@ -100,11 +100,7 @@ def run(tinyrna_cwl_path: str, config_file: str) -> None:
# Use the cwltool CWL runner via command line
return_code = run_cwltool_subprocess(config_object, workflow, run_directory)
- # If the workflow completed without errors, we want to update
- # the Paths Sheet to point to the new bowtie index prefix
- if config_object['run_bowtie_build'] and return_code == 0:
- paths_sheet_filename = config_object.paths.inf
- config_object.paths.write_processed_config(paths_sheet_filename)
+ config_object.execute_post_run_tasks(return_code)
@report_execution_time("Pipeline resume runtime")
diff --git a/tiny/rna/configuration.py b/tiny/rna/configuration.py
index 9e629c10..bdd9d777 100644
--- a/tiny/rna/configuration.py
+++ b/tiny/rna/configuration.py
@@ -11,6 +11,7 @@
from collections import Counter, OrderedDict
from datetime import datetime
from typing import Union, Any
+from glob import glob
from tiny.rna.counter.validation import GFFValidator
@@ -353,6 +354,28 @@ def validate_inputs(self):
genomes=self.paths['reference_genome_files'],
alignments=None # Used in tiny-count standalone runs
).validate()
+
+ def execute_post_run_tasks(self, return_code):
+ if self['run_bowtie_build']:
+ self.verify_bowtie_build_outputs()
+
+ def verify_bowtie_build_outputs(self):
+ """Ensures that bowtie indexes were produced before saving the new ebwt prefix to the Paths File.
+ If large indexes were produced, paths under bt_index_files need to be updated in the processed Run Config"""
+
+ indexes = glob(os.path.join(self['run_directory'], self['dir_name_bt_build'], "*.ebwt*"))
+ large_indexes = [f for f in indexes if f.endswith(".ebwtl")]
+
+ # Update Paths File
+ if indexes:
+ self.paths.write_processed_config(self.paths.inf)
+
+ # Update Run Config
+ if large_indexes:
+ for expected in self['bt_index_files']:
+ expected['path'] += "l"
+ assert expected['path'] in large_indexes
+ self.write_processed_config()
def save_run_profile(self, config_file_name=None) -> str:
"""Saves Samples Sheet and processed run config to the Run Directory for record keeping"""
From 3154f76f15bbcec13968d9decb67c68ffb02861c Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Thu, 13 Oct 2022 18:30:39 -0700
Subject: [PATCH 5/9] Misc bugfixes addressing changes made in this issue
---
tiny/rna/configuration.py | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/tiny/rna/configuration.py b/tiny/rna/configuration.py
index bdd9d777..31da0969 100644
--- a/tiny/rna/configuration.py
+++ b/tiny/rna/configuration.py
@@ -288,16 +288,16 @@ def setup_ebwt_idx(self):
if self['run_bowtie_build']:
# Set the prefix to the run directory outputs. This is necessary
# because workflow requires bt_index_files to be a populated list.
- prefix = self.get_ebwt_prefix()
+ self.paths['ebwt'] = self.get_ebwt_prefix()
else:
- prefix = self.paths.from_here(self.paths['ebwt'])
+ self.paths['ebwt'] = self.paths.from_here(self.paths['ebwt'])
# verify_bowtie_build_outputs() will check if these end up being long indexes
- self['bt_index_files'] = self.get_bt_index_files(prefix)
+ self['bt_index_files'] = self.get_bt_index_files()
# When CWL copies bt_index_filex for the bowtie.cwl InitialWorkDirRequirement, it does not
# preserve the prefix path. What the workflow "sees" is the ebwt files at working dir root
- self["ebwt"] = os.path.basename(prefix)
+ self['ebwt'] = os.path.basename(self.paths['ebwt'])
def get_ebwt_prefix(self):
"""Determines the output prefix path for bowtie indexes that haven't been built yet. The basename
@@ -314,13 +314,14 @@ def get_ebwt_prefix(self):
self['run_directory'], self['dir_name_bt_build'], genome_basename
))
- def get_bt_index_files(self, prefix):
+ def get_bt_index_files(self):
"""Builds the list of expected bowtie index files from the ebwt prefix. If an index file
doesn't exist then they will be automatically rebuilt from the user's reference genomes.
File existence isn't checked if bowtie-build is already scheduled for this run."""
try:
verify_file_paths = not bool(self['run_bowtie_build'])
+ prefix = self.paths['ebwt']
ext = "ebwt"
return [
@@ -334,9 +335,9 @@ def get_bt_index_files(self, prefix):
if self['reference_genome_files']:
print(problem + rebuild, file=sys.stderr)
- new_prefix = self.get_ebwt_prefix()
+ self.paths['ebwt'] = self.get_ebwt_prefix()
self['run_bowtie_build'] = True
- return self.get_bt_index_files(new_prefix)
+ return self.get_bt_index_files()
else:
sys.exit(problem + userfix)
From 0476e0459f473c380a9f2df987aef25b5c87ab25 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Thu, 13 Oct 2022 18:32:24 -0700
Subject: [PATCH 6/9] Misc small improvements to clarity of code and comments,
plus a stability fix for cases where ["reference_genome_files"] contains
empty list items
---
tiny/rna/configuration.py | 20 ++++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/tiny/rna/configuration.py b/tiny/rna/configuration.py
index 31da0969..ea02ac30 100644
--- a/tiny/rna/configuration.py
+++ b/tiny/rna/configuration.py
@@ -45,7 +45,7 @@ def __init__(self, config_file: str):
def __getitem__(self, key: str) -> Any:
return self.get(key)
- def __setitem__(self, key: str, val: Union[str, list, dict, bool]) -> Union[str, list, dict, bool]:
+ def __setitem__(self, key: str, val: Union[str, list, dict, bool, None]) -> Union[str, list, dict, bool, None]:
return self.set(key, val)
def __contains__(self, key: str) -> bool:
@@ -169,14 +169,14 @@ def __init__(self, config_file: str, validate_inputs=False):
self.paths = self.load_paths_config()
self.process_paths_sheet()
-
+
self.setup_pipeline()
self.setup_per_file()
self.setup_ebwt_idx()
self.process_sample_sheet()
self.process_feature_sheet()
if validate_inputs: self.validate_inputs()
-
+
def load_paths_config(self):
"""Constructs a sub-configuration object containing workflow file preferences"""
path_sheet = self.from_here(self['paths_config'])
@@ -194,11 +194,12 @@ def to_cwl_file_class(input_file_path):
self['run_directory'] = self.paths.from_here(self.paths['run_directory'])
# Configurations that need to be converted from string to a CWL File object
- self['samples_csv'] = to_cwl_file_class(self.paths.from_here(self.paths['samples_csv']))
- self['features_csv'] = to_cwl_file_class(self.paths.from_here(self.paths['features_csv']))
+ self['samples_csv'] = to_cwl_file_class(self.paths['samples_csv'])
+ self['features_csv'] = to_cwl_file_class(self.paths['features_csv'])
self['reference_genome_files'] = [
- to_cwl_file_class(self.paths.from_here(genome))
+ to_cwl_file_class(genome)
for genome in self.paths['reference_genome_files']
+ if genome is not None
]
def process_sample_sheet(self):
@@ -280,7 +281,10 @@ def setup_pipeline(self):
self.templates = resource_filename('tiny', 'templates/')
def setup_ebwt_idx(self):
- """Determines Bowtie index prefix and whether bowtie-build should run"""
+ """Determines Bowtie index prefix and whether bowtie-build should run.
+ self['ebwt'] is used for the bowtie commandline argument (see note below)
+ self.paths['ebwt'] is the actual prefix path
+ """
# Empty values for ebwt (''/~/None) trigger bowtie-build
self['run_bowtie_build'] = not bool(self.paths['ebwt'])
@@ -295,7 +299,7 @@ def setup_ebwt_idx(self):
# verify_bowtie_build_outputs() will check if these end up being long indexes
self['bt_index_files'] = self.get_bt_index_files()
- # When CWL copies bt_index_filex for the bowtie.cwl InitialWorkDirRequirement, it does not
+ # When CWL copies bt_index_files for the bowtie.cwl InitialWorkDirRequirement, it does not
# preserve the prefix path. What the workflow "sees" is the ebwt files at working dir root
self['ebwt'] = os.path.basename(self.paths['ebwt'])
From 98462e754c4669997e8246316616cb206ad763dc Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Thu, 13 Oct 2022 18:36:03 -0700
Subject: [PATCH 7/9] Adding unit tests for new bowtie index handling procedure
---
tests/unit_tests_configuration.py | 139 ++++++++++++++++++++++++++++++
1 file changed, 139 insertions(+)
create mode 100644 tests/unit_tests_configuration.py
diff --git a/tests/unit_tests_configuration.py b/tests/unit_tests_configuration.py
new file mode 100644
index 00000000..a3d56821
--- /dev/null
+++ b/tests/unit_tests_configuration.py
@@ -0,0 +1,139 @@
+import contextlib
+import io
+import os
+import unittest
+from unittest.mock import patch, mock_open, call
+
+from tiny.rna.configuration import Configuration
+
+
+class ConfigurationTests(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ self.root_cfg_dir = os.path.abspath("../tiny/templates")
+ self.run_config = self.root_cfg_dir + "/run_config_template.yml"
+ self.paths = self.root_cfg_dir + "/paths.yml"
+
+ self.default_prefix = os.path.join(
+ self.root_cfg_dir,
+ Configuration(self.run_config)['run_directory'],
+ "bowtie-build/ram1"
+ )
+ self.maxDiff = 1522
+
+ """============ Helper functions ============"""
+
+ def config_with(self, prefs):
+ config = Configuration(self.run_config)
+ for key, val in prefs.items():
+ config[key] = val
+ return config
+
+ def bt_idx_files_from_prefix(self, prefix):
+ return [
+ {'path': f"{prefix}.{subext}.ebwt", 'class': 'File'}
+ for subext in ['1', '2', '3', '4', 'rev.1', 'rev.2']
+ ]
+
+ """================ Tests =================="""
+
+ """Does get_ebwt_prefix() produce the expected prefix path?"""
+
+ def test_get_ebwt_prefix(self):
+ config = Configuration(self.run_config)
+ actual_prefix = config.get_ebwt_prefix()
+ expected_prefix = self.default_prefix
+
+ self.assertEqual(actual_prefix, expected_prefix)
+
+ """Does get_ebwt_prefix() throw an error if reference genome files aren't provided?"""
+
+ def test_get_ebwt_prefix_no_genome(self):
+ config = Configuration(self.run_config)
+ config['reference_genome_files'] = None
+
+ with self.assertRaises(ValueError):
+ config.get_ebwt_prefix()
+
+ """Does get_bt_index_files() output the paths of indexes that have already been built?"""
+
+ def test_get_bt_index_files_prebuilt_indexes(self):
+ config = self.config_with({'run_bowtie_build': False})
+ prefix = config.paths['ebwt'] = os.path.abspath("./testdata/counter/validation/ebwt/ram1")
+ expected = self.bt_idx_files_from_prefix(prefix)
+ self.assertListEqual(config.get_bt_index_files(), expected)
+
+ """Does get_bt_index_files() output the paths of the index files that are expected
+ to be built from the reference genome?"""
+
+ def test_get_bt_index_files_unbuilt_indexes_with_genome(self):
+ config = self.config_with({'run_bowtie_build': True})
+ prefix = config.paths['ebwt'] = "mock_prefix"
+ expected = self.bt_idx_files_from_prefix(prefix)
+ self.assertListEqual(config.get_bt_index_files(), expected)
+
+ """Does get_bt_index_files() produce an error and quit when index files are
+ missing and a reference genome has not been provided?"""
+
+ def test_get_bt_index_files_missing_indexes_without_genome(self):
+ config = self.config_with({'run_bowtie_build': False, 'reference_genome_files': None})
+ prefix = config.paths['ebwt'] = "missing"
+ errmsg = '\n'.join([
+ "The following Bowtie index file couldn't be found:",
+ "\t" + f"{prefix}.1.ebwt",
+ "\nPlease either correct your ebwt prefix or add reference genomes in the Paths File."
+ ])
+
+ with self.assertRaisesRegex(SystemExit, errmsg):
+ config.get_bt_index_files()
+
+ """Does get_bt_index_files() produce an error without quitting when index files
+ are missing but a reference genome was provided, and does it return the list of
+ index files that will be built from the genome?"""
+
+ def test_get_bt_index_files_missing_indexes_with_genome(self):
+ config = self.config_with({'run_bowtie_build': False})
+ bad_prefix = config.paths['ebwt'] = "missing"
+ genome_prefix = self.default_prefix
+
+ expected_files = self.bt_idx_files_from_prefix(genome_prefix)
+ expected_error = '\n'.join([
+ "The following Bowtie index file couldn't be found:",
+ "\t" + f"{bad_prefix}.1.ebwt",
+ "\nIndexes will be built from your reference genome files during this run.",
+ ""
+ ])
+
+ stderr = io.StringIO()
+ with contextlib.redirect_stderr(stderr):
+ actual = config.get_bt_index_files()
+
+ self.assertEqual(stderr.getvalue(), expected_error)
+ self.assertListEqual(actual, expected_files)
+
+ """Does verify_bowtie_build_outputs() update the paths in ["bt_index_files"] and rewrite
+ these changes to the processed Run Config if long indexes were produced? Does it also
+ write to the Paths File to update the new ebwt prefix?"""
+
+ def test_verify_bowtie_build_outputs(self):
+ ebwt_short = ["1.ebwt", "2.ebwt", "3.ebwt"]
+ ebwt_long = ["1.ebwtl", "2.ebwtl", "3.ebwtl"]
+ run_conf_ebwt = [Configuration.cwl_file(f, verify=False) for f in ebwt_short]
+ expected_ebwt = [Configuration.cwl_file(f, verify=False) for f in ebwt_long]
+
+ config = self.config_with({'bt_index_files': run_conf_ebwt})
+
+ with patch('tiny.rna.configuration.open', mock_open()) as mo, \
+ patch('tiny.rna.configuration.glob', return_value=ebwt_long) as g:
+ config.verify_bowtie_build_outputs()
+
+ expected_writes = [
+ call(self.paths, 'w'),
+ call(os.path.join(self.root_cfg_dir, config['run_directory'], os.path.basename(self.run_config)), 'w')
+ ]
+
+ self.assertListEqual(config['bt_index_files'], expected_ebwt)
+ self.assertListEqual(mo.call_args_list, expected_writes)
+
+if __name__ == '__main__':
+ unittest.main()
From f751747d1ccfec5098215b4bddf8c7d13efec178 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Thu, 13 Oct 2022 19:01:00 -0700
Subject: [PATCH 8/9] Documentation updates removing previous bowtie index
requirements, and descriptions of the new activation procedure for the
bowtie-build step.
---
README.md | 3 +--
START_HERE/TUTORIAL.md | 4 +---
doc/Configuration.md | 9 ++++-----
3 files changed, 6 insertions(+), 10 deletions(-)
diff --git a/README.md b/README.md
index a418c74c..43336870 100644
--- a/README.md
+++ b/README.md
@@ -95,9 +95,8 @@ tiny get-template
| Reference annotations
[(example)](START_HERE/reference_data/ram1.gff3) | GFF3 / GFF2 / GTF | Column 9 attributes (defined as "tag=value" or "tag "):