Skip to content

Commit c59a691

Browse files
authored
Merge pull request #142 from CEGRcode/DNAshape-tools-cdt
Add matrix output for DNAShape tools (#126 )
2 parents a3d037b + dc8670c commit c59a691

File tree

9 files changed

+1096
-538
lines changed

9 files changed

+1096
-538
lines changed

src/main/java/scriptmanager/cli/Sequence_Analysis/DNAShapefromBEDCLI.java

Lines changed: 75 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import java.io.PrintStream;
1414

1515
import scriptmanager.objects.ToolDescriptions;
16+
import scriptmanager.objects.Exceptions.OptionException;
1617
import scriptmanager.util.ExtensionFileFilter;
1718
import scriptmanager.scripts.Sequence_Analysis.DNAShapefromBED;
1819

@@ -22,8 +23,12 @@
2223
*
2324
* @author Olivia Lang
2425
*/
25-
@Command(name = "dna-shape-bed", mixinStandardHelpOptions = true, description = ToolDescriptions.dna_shape_from_bed_description, version = "ScriptManager "
26-
+ ToolDescriptions.VERSION, sortOptions = false, exitCodeOnInvalidInput = 1, exitCodeOnExecutionException = 1)
26+
@Command(name = "dna-shape-bed", mixinStandardHelpOptions = true,
27+
description = ToolDescriptions.dna_shape_from_bed_description,
28+
version = "ScriptManager " + ToolDescriptions.VERSION,
29+
sortOptions = false,
30+
exitCodeOnInvalidInput = 1,
31+
exitCodeOnExecutionException = 1)
2732
public class DNAShapefromBEDCLI implements Callable<Integer> {
2833

2934
/**
@@ -36,13 +41,16 @@ public DNAShapefromBEDCLI(){}
3641
@Parameters(index = "1", description = "the BED file of sequences to extract")
3742
private File bedFile;
3843

39-
@Option(names = { "-o",
40-
"--output" }, description = "Specify basename for output files, files for each shape indicated will share this name with a different suffix")
41-
private String outputBasename = null;
44+
@Option(names = { "-o", "--output" }, description = "Specify basename for output files, files for each shape indicated will share this name with a different suffix")
45+
private File outputBasename = null;
4246
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
4347
private boolean gzOutput = false;
44-
@Option(names = { "--avg-composite" }, description = "Save average composite")
45-
private boolean avgComposite = false;
48+
@Option(names = { "--composite" }, description = "Save average composite (column-wise avg of matrix)")
49+
private boolean composite = false;
50+
@Option(names = { "--matrix" }, description = "Save tab-delimited matrix of shape scores")
51+
private boolean matrix = false;
52+
@Option(names = { "--cdt" }, description = "Save CDT-formatted matrix")
53+
private boolean cdt = true;
4654
@Option(names = { "-n", "--no-force" }, description = "don't force-strandedness (default is to force strandedness)")
4755
private boolean forceStrand = true;
4856

@@ -63,6 +71,7 @@ static class ShapeType {
6371
}
6472

6573
private boolean[] OUTPUT_TYPE = new boolean[] { false, false, false, false };
74+
private short outputMatrix = DNAShapefromBED.NO_MATRIX;
6675

6776
/**
6877
* Runs when this subcommand is called, running script in respective script package with user defined arguments
@@ -78,36 +87,30 @@ public Integer call() throws Exception {
7887
System.exit(1);
7988
}
8089

90+
// Generate Composite Plot
91+
DNAShapefromBED script_obj = new DNAShapefromBED(genomeFASTA, bedFile, outputBasename, OUTPUT_TYPE,
92+
forceStrand, composite, outputMatrix, gzOutput);
93+
script_obj.run();
8194
// Print Composite Scores
82-
try {
83-
// Generate Composite Plot
84-
DNAShapefromBED script_obj = new DNAShapefromBED(genomeFASTA, bedFile, outputBasename, OUTPUT_TYPE,
85-
forceStrand, new PrintStream[] { null, null, null, null }, gzOutput);
86-
script_obj.run();
87-
88-
if (avgComposite) {
89-
String[] headers = new String[] { "AVG_MGW", "AVG_PropT", "AVG_HelT", "AVG_Roll" };
90-
for (int t = 0; t < OUTPUT_TYPE.length; t++) {
91-
if (OUTPUT_TYPE[t]) {
92-
PrintStream COMPOSITE = new PrintStream(new File(outputBasename + "_" + headers[t] + ".out"));
93-
double[] AVG = script_obj.getAvg(t);
94-
// position vals
95-
for (int z = 0; z < AVG.length; z++) {
96-
COMPOSITE.print("\t" + z);
97-
}
98-
COMPOSITE.print("\n" + ExtensionFileFilter.stripExtension(bedFile) + "_" + headers[t]);
99-
// score vals
100-
for (int z = 0; z < AVG.length; z++) {
101-
COMPOSITE.print("\t" + AVG[z]);
102-
}
103-
COMPOSITE.println();
95+
if (composite) {
96+
String[] headers = new String[] { "AVG_MGW", "AVG_PropT", "AVG_HelT", "AVG_Roll" };
97+
for (int t = 0; t < OUTPUT_TYPE.length; t++) {
98+
if (OUTPUT_TYPE[t]) {
99+
PrintStream COMPOSITE = new PrintStream(new File(outputBasename + "_" + headers[t] + ".out"));
100+
double[] AVG = script_obj.getAvg(t);
101+
// position vals
102+
for (int z = 0; z < AVG.length; z++) {
103+
COMPOSITE.print("\t" + z);
104104
}
105+
COMPOSITE.print("\n" + ExtensionFileFilter.stripExtension(bedFile) + "_" + headers[t]);
106+
// score vals
107+
for (int z = 0; z < AVG.length; z++) {
108+
COMPOSITE.print("\t" + AVG[z]);
109+
}
110+
COMPOSITE.println();
105111
}
106112
}
107-
} catch (FileNotFoundException e) {
108-
e.printStackTrace();
109113
}
110-
111114
System.err.println("Shapes Calculated.");
112115
return (0);
113116
}
@@ -134,10 +137,10 @@ private String validateInput() throws IOException {
134137
}
135138
// set default output filename
136139
if (outputBasename == null) {
137-
outputBasename = ExtensionFileFilter.stripExtension(bedFile);
140+
outputBasename = new File(ExtensionFileFilter.stripExtension(bedFile));
138141
// check output filename is valid
139142
} else {
140-
String outParent = new File(outputBasename).getParent();
143+
String outParent = outputBasename.getParent();
141144
// no extension check
142145
// check directory
143146
if (outParent == null) {
@@ -171,34 +174,59 @@ private String validateInput() throws IOException {
171174
OUTPUT_TYPE = new boolean[] { true, true, true, true };
172175
}
173176

177+
if (matrix && cdt) {
178+
r += "(!)Please select either the matrix or the cdt flag.\n";
179+
} else if (matrix) {
180+
outputMatrix = DNAShapefromBED.TAB;
181+
} else if (cdt) {
182+
outputMatrix = DNAShapefromBED.CDT;
183+
}
184+
174185
return (r);
175186
}
176187

177188
/**
178189
* Reconstruct CLI command
179190
*
180-
* @param gen the reference genome sequence in FASTA-format (FAI will be
181-
* automatically generated)
182-
* @param input the BED-formatted coordinate intervals to extract sequence from
183-
* @param out the output file name base (to add _&lt;shapetype&gt;.cdt suffix
184-
* to)
185-
* @param type a four-element boolean list for specifying shape type to output
186-
* (no enforcement on size)
187-
* @param str force strandedness (true=forced, false=not forced)
188-
* @param gzOutput whether or not to gzip output
191+
* @param gen the reference genome sequence in FASTA-format (FAI
192+
* will be automatically generated)
193+
* @param input the BED-formatted coordinate intervals to extract
194+
* sequence from
195+
* @param out the output file name base (to add
196+
* _&lt;shapetype&gt;.cdt suffix to)
197+
* @param type a four-element boolean list for specifying shape type
198+
* to output (no enforcement on size)
199+
* @param str force strandedness (true=forced, false=not forced)
200+
* @param outputComposite whether to output a composite average output
201+
* @param outputMatrix value encoding not to write output matrix data, write
202+
* matrix in CDT format, and write matrix in tab format
203+
* @param gzOutput whether or not to gzip output
189204
* @return command line to execute with formatted inputs
190205
*/
191-
public static String getCLIcommand(File gen, File input, String out, boolean[] type, boolean str, boolean gzOutput) {
206+
public static String getCLIcommand(File gen, File input, File out, boolean[] type, boolean str, boolean outputComposite, short outputMatrix, boolean gzOutput) throws OptionException {
192207
String command = "java -jar $SCRIPTMANAGER sequence-analysis dna-shape-bed";
193-
command += " -o " + out;
208+
command += " -o " + out.getAbsolutePath();
194209
command += gzOutput ? " -z " : "";
195210
command += type[0] ? " --groove" : "";
196211
command += type[1] ? " --propeller" : "";
197212
command += type[2] ? " --helical" : "";
198213
command += type[3] ? " --roll" : "";
199214
command += str ? "" : "--no-force";
200-
command += " " + gen;
201-
command += " " + input;
215+
command += outputComposite ? "--composite" : "";
216+
switch (outputMatrix) {
217+
case DNAShapefromBED.NO_MATRIX:
218+
break;
219+
case DNAShapefromBED.TAB:
220+
command += " --matrix";
221+
break;
222+
case DNAShapefromBED.CDT:
223+
command += " --cdt";
224+
break;
225+
default:
226+
throw new OptionException("outputMatrix type value " + outputMatrix + " not supported");
227+
}
228+
command += " " + gen.getAbsolutePath();
229+
command += " " + input.getAbsolutePath();
202230
return (command);
203231
}
204232
}

src/main/java/scriptmanager/cli/Sequence_Analysis/DNAShapefromFASTACLI.java

Lines changed: 66 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
import java.io.PrintStream;
1414

1515
import scriptmanager.objects.ToolDescriptions;
16+
import scriptmanager.objects.Exceptions.OptionException;
1617
import scriptmanager.util.ExtensionFileFilter;
18+
import scriptmanager.scripts.Sequence_Analysis.DNAShapefromBED;
1719
import scriptmanager.scripts.Sequence_Analysis.DNAShapefromFASTA;
1820

1921
/**
@@ -39,11 +41,15 @@ public DNAShapefromFASTACLI(){}
3941
private File fastaFile;
4042

4143
@Option(names = { "-o", "--output" }, description = "Specify basename for output files, files for each shape indicated will share this name with a different suffix")
42-
private String outputBasename = null;
43-
@Option(names = { "--avg-composite" }, description = "Save average composite")
44-
private boolean avgComposite = false;
44+
private File outputBasename = null;
4545
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
4646
private boolean gzOutput = false;
47+
@Option(names = { "--composite" }, description = "Save average composite (column-wise avg of matrix)")
48+
private boolean composite = false;
49+
@Option(names = { "--matrix" }, description = "Save tab-delimited matrix of shape scores")
50+
private boolean matrix = false;
51+
@Option(names = { "--cdt" }, description = "Save CDT-formatted matrix")
52+
private boolean cdt = true;
4753

4854
@ArgGroup(validate = false, heading = "Shape Options%n")
4955
ShapeType shape = new ShapeType();
@@ -57,12 +63,12 @@ static class ShapeType {
5763
private boolean propeller = false;
5864
@Option(names = { "-l", "--helical" }, description = "output helical twist")
5965
private boolean helical = false;
60-
@Option(names = { "-a",
61-
"--all" }, description = "output groove, roll, propeller twist, and helical twist (equivalent to -grpl).")
66+
@Option(names = { "-a", "--all" }, description = "output groove, roll, propeller twist, and helical twist (equivalent to -grpl).")
6267
private boolean all = false;
6368
}
6469

6570
private boolean[] OUTPUT_TYPE = new boolean[] { false, false, false, false };
71+
private short outputMatrix = DNAShapefromBED.NO_MATRIX;
6672

6773
/**
6874
* Runs when this subcommand is called, running script in respective script package with user defined arguments
@@ -80,32 +86,27 @@ public Integer call() throws Exception {
8086

8187
// Generate Composite Plot
8288
DNAShapefromFASTA script_obj = new DNAShapefromFASTA(fastaFile, outputBasename, OUTPUT_TYPE,
83-
new PrintStream[] { null, null, null, null }, gzOutput);
89+
composite, outputMatrix, gzOutput);
8490
script_obj.run();
85-
86-
// Print Composite Scores
87-
try {
88-
if (avgComposite) {
89-
String[] headers = new String[] { "AVG_MGW", "AVG_PropT", "AVG_HelT", "AVG_Roll" };
90-
for (int t = 0; t < OUTPUT_TYPE.length; t++) {
91-
if (OUTPUT_TYPE[t]) {
92-
PrintStream COMPOSITE = new PrintStream(new File(outputBasename + "_" + headers[t] + ".out"));
93-
double[] AVG = script_obj.getAvg(t);
94-
// position vals
95-
for (int z = 0; z < AVG.length; z++) {
96-
COMPOSITE.print("\t" + z);
97-
}
98-
COMPOSITE.print("\n" + ExtensionFileFilter.stripExtension(fastaFile) + "_" + headers[t]);
99-
// score vals
100-
for (int z = 0; z < AVG.length; z++) {
101-
COMPOSITE.print("\t" + AVG[z]);
102-
}
103-
COMPOSITE.println();
91+
// Print Composite Scoress
92+
if (composite) {
93+
String[] headers = new String[] { "AVG_MGW", "AVG_PropT", "AVG_HelT", "AVG_Roll" };
94+
for (int t = 0; t < OUTPUT_TYPE.length; t++) {
95+
if (OUTPUT_TYPE[t]) {
96+
PrintStream COMPOSITE = new PrintStream(new File(outputBasename + "_" + headers[t] + ".out"));
97+
double[] AVG = script_obj.getAvg(t);
98+
// position vals
99+
for (int z = 0; z < AVG.length; z++) {
100+
COMPOSITE.print("\t" + z);
101+
}
102+
COMPOSITE.print("\n" + ExtensionFileFilter.stripExtension(fastaFile) + "_" + headers[t]);
103+
// score vals
104+
for (int z = 0; z < AVG.length; z++) {
105+
COMPOSITE.print("\t" + AVG[z]);
104106
}
107+
COMPOSITE.println();
105108
}
106109
}
107-
} catch (FileNotFoundException e) {
108-
e.printStackTrace();
109110
}
110111

111112
System.err.println("Shapes Calculated.");
@@ -124,14 +125,17 @@ private String validateInput() throws IOException {
124125
// check inputs exist
125126
if (!fastaFile.exists()) {
126127
r += "(!)FASTA file does not exist: " + fastaFile.getName() + "\n";
128+
}
129+
if (!r.equals("")) {
127130
return (r);
128131
}
129132
// set default output filename
130133
if (outputBasename == null) {
131-
outputBasename = ExtensionFileFilter.stripExtension(fastaFile);
134+
outputBasename = new File(ExtensionFileFilter.stripExtension(fastaFile));
132135
// check output filename is valid
133136
} else {
134-
String outParent = new File(outputBasename).getParent();
137+
String outParent = outputBasename.getParent();
138+
// no extension check
135139
// check directory
136140
if (outParent == null) {
137141
// System.err.println("default to current directory");
@@ -164,29 +168,53 @@ private String validateInput() throws IOException {
164168
OUTPUT_TYPE = new boolean[] { true, true, true, true };
165169
}
166170

171+
if (matrix && cdt) {
172+
r += "(!)Please select either the matrix or the cdt flag.\n";
173+
} else if (matrix) {
174+
outputMatrix = DNAShapefromBED.TAB;
175+
} else if (cdt) {
176+
outputMatrix = DNAShapefromBED.CDT;
177+
}
178+
167179
return (r);
168180
}
169181

170182
/**
171183
* Reconstruct CLI command
172184
*
173-
* @param fa the FASTA-formatted file with a fixed sequence length
174-
* @param out the output file name base (to add _&lt;shapetype&gt;.cdt suffix
175-
* to)
176-
* @param type a four-element boolean list for specifying shape type to output
177-
* (no enforcement on size)
178-
* @param gzOutput whether or not to gzip output
185+
* @param input the FASTA-formatted file with a fixed sequence length
186+
* @param out the output file name base (to add
187+
* _&lt;shapetype&gt;.cdt suffix to)
188+
* @param type a four-element boolean list for specifying shape type
189+
* to output (no enforcement on size)
190+
* @param outputComposite whether to output a composite average output
191+
* @param outputMatrix value encoding not to write output matrix data, write
192+
* matrix in CDT format, and write matrix in tab format
193+
* @param gzOutput whether or not to gzip output
179194
* @return command line to execute with formatted inputs
180195
*/
181-
public static String getCLIcommand(File fa, String out, boolean[] type, boolean gzOutput) {
196+
public static String getCLIcommand(File input, File out, boolean[] type, boolean outputComposite, short outputMatrix, boolean gzOutput) throws OptionException {
182197
String command = "java -jar $SCRIPTMANAGER sequence-analysis dna-shape-fasta";
183-
command += " -o " + out;
198+
command += " -o " + out.getAbsolutePath();
184199
command += gzOutput ? " -z " : "";
185200
command += type[0] ? " --groove" : "";
186201
command += type[1] ? " --propeller" : "";
187202
command += type[2] ? " --helical" : "";
188203
command += type[3] ? " --roll" : "";
189-
command += " " + fa;
204+
command += outputComposite ? "--composite" : "";
205+
switch (outputMatrix) {
206+
case DNAShapefromBED.NO_MATRIX:
207+
break;
208+
case DNAShapefromBED.TAB:
209+
command += " --matrix";
210+
break;
211+
case DNAShapefromBED.CDT:
212+
command += " --cdt";
213+
break;
214+
default:
215+
throw new OptionException("outputMatrix type value " + outputMatrix + " not supported");
216+
}
217+
command += " " + input.getAbsolutePath();
190218
return (command);
191219
}
192220
}

0 commit comments

Comments
 (0)