Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
72 commits
Select commit Hold shift + click to select a range
07d9f14
Add a bash script for making and jaring Clans from the command line
MartinGuehmann Sep 30, 2020
08694fe
Prepare MinimalHsp so that it can be used as key in a HashMap
MartinGuehmann Oct 1, 2020
be2e352
Cleanup places for better HasMap usage, before replace
MartinGuehmann Oct 1, 2020
3f3485c
Make ClusterDataLoadHelper.parse_hsp_block use the same MinimalHsp as…
MartinGuehmann Oct 1, 2020
ce0fbb4
Reduce memory by using the same MinimalHsp object for key and value i…
MartinGuehmann Oct 1, 2020
9bac3cb
Reduce memory by using the same MinimalHsp object for key and value i…
MartinGuehmann Oct 1, 2020
707d861
Use MinimalAttractionValue in HashMaps as keys to itself instead of S…
MartinGuehmann Oct 2, 2020
3892993
Cleanup code initialize a local HasMap as late as possible, removed a…
MartinGuehmann Oct 2, 2020
fc22539
Use of MinimalAttractionValue itself as key in the HashTable instead …
MartinGuehmann Oct 4, 2020
92b77f1
Use HashSets instead of HashMaps in SelectedSubsetHandling to save me…
MartinGuehmann Oct 4, 2020
9942c68
Remove tmp members from IterationsComputerThread
MartinGuehmann Oct 4, 2020
b808005
Simplify code: Use the two argement construcor of AminoAcidSequence t…
MartinGuehmann Oct 5, 2020
881d405
In ClusterMethods.removeGapsFromSequences only replace the sequence i…
MartinGuehmann Oct 5, 2020
28cdf87
Use Integer instead of String as type for the HashMaps in ClusterDete…
MartinGuehmann Oct 6, 2020
d646435
Remove hashkeys from ClusterDetection.java, since hashkeys[i] = i
MartinGuehmann Oct 6, 2020
d6ad91d
White space cleanup in ClusterDetection.java
MartinGuehmann Oct 6, 2020
71f6358
Remove unused parameter of ClusterDetection.getconnecteds
MartinGuehmann Oct 6, 2020
a394ea5
Turn clusterhash in ClusterDetection.multilinkage into a 2D-array, as…
MartinGuehmann Oct 6, 2020
70f12f9
Replace HashMap by HashSet in ClusterDetectuin.java to reduce code co…
MartinGuehmann Oct 6, 2020
5d82e11
Use existing Integer objects for HashMaps and HashSets in ClusterDete…
MartinGuehmann Oct 6, 2020
bf613bc
Accelerate loading with many HSPs, by presetting the capacity of a Ha…
MartinGuehmann Oct 7, 2020
f313999
Rename basevec to remainingSeqIDs in ClusterDetection.java
MartinGuehmann Oct 7, 2020
fa2059c
Rename the methods in ClusterDetection.java for convex clustering
MartinGuehmann Oct 7, 2020
84140a6
Rename currvec to newClusterSeqIDs in ClusterDetection.java
MartinGuehmann Oct 7, 2020
b336f7d
Having the "Find clusters" menu item in the "Windows" menu starting w…
MartinGuehmann Oct 7, 2020
bcd7387
Clean up sort code in ClusterDetection.java
MartinGuehmann Oct 8, 2020
9743c8f
Rename retvec to returnClusters in ClusterDetectionn.getConvex
MartinGuehmann Oct 8, 2020
c5265ec
Clean up white space and add camelCasing for varibables in SequenceCl…
MartinGuehmann Oct 8, 2020
1d295c5
Add also camelCasing and fix naming for member variables in SequenceC…
MartinGuehmann Oct 8, 2020
9faab95
Simplify code by moving into the SequenceCluster constructors
MartinGuehmann Oct 8, 2020
5b8e815
Turn the code about convex clustering in ClusterDetection.java into a…
MartinGuehmann Oct 8, 2020
cd671f4
Rename getAverageAttraction to getAverageLocalAttraction in ConvexClu…
MartinGuehmann Oct 8, 2020
2752c7f
Turn attvals, sigmafac, minseqnum, and seqnum into members of ConvexC…
MartinGuehmann Oct 8, 2020
c0e5574
Make remainingSeqIDs and newClusterSeqIDs members of ConvexClustering
MartinGuehmann Oct 8, 2020
4fa6e6e
Cleanup comments in ConvexClustering
MartinGuehmann Oct 8, 2020
0c648d2
Turn avgatt into ConvexClutering.avgAttraction
MartinGuehmann Oct 8, 2020
ffa262d
Turn varatt into ConvexClutering.attractionVar
MartinGuehmann Oct 8, 2020
d921188
Move initialze code of ConvexClustering.getconvex into its own initia…
MartinGuehmann Oct 8, 2020
4497d38
Save time by using the same HashSet for computeAverageAttraction and …
MartinGuehmann Oct 8, 2020
11f9cbc
CamelCase the members of ConvexClustering
MartinGuehmann Oct 8, 2020
346c04b
Clean up variable names in ConvexClustering.getMaxAttraction
MartinGuehmann Oct 8, 2020
cc796f5
Cleanup local variable names in ConvexClustering
MartinGuehmann Oct 8, 2020
06ae700
Clean up ConvexClustering.getCluster further
MartinGuehmann Oct 8, 2020
ade2e40
Minor cleanup of ClusterDetection.java
MartinGuehmann Oct 8, 2020
64b7cf9
Rename getCluster to getOneCluster in ConvexClustering to reflect its…
MartinGuehmann Oct 8, 2020
fe03af5
Use ArrayList instead of Vector for members of ConvexClustering for s…
MartinGuehmann Oct 8, 2020
0ba71a8
Loop only through the attraction values that belong to the nodes of i…
MartinGuehmann Oct 9, 2020
7c086b3
Implement minimum number of sequences for ConvexClustering
MartinGuehmann Oct 9, 2020
8f718d9
Add the number of found clusters to the cluster output window title bar
MartinGuehmann Oct 10, 2020
47ab00e
Report to the command line how long clustering took
MartinGuehmann Oct 10, 2020
de6e8f1
Accelerate ConvexClustering by multithreading
MartinGuehmann Oct 10, 2020
225b65c
Give clusters in the cluster detection window proper names
MartinGuehmann Oct 10, 2020
de93573
Simplify vector access in WindowClusterDetectionResults.java
MartinGuehmann Oct 11, 2020
d76963e
Improve variable names in WindowClusterDetectionResults.java, use cam…
MartinGuehmann Oct 11, 2020
f1c00f0
Cleanup white space in WindowClusterDetectionResults.java
MartinGuehmann Oct 11, 2020
150477c
Cleanup variable names in WindowClusterDetectionResults further: Name…
MartinGuehmann Oct 11, 2020
0666831
Do not add sequences if the cancel button was pressed in WindowCluste…
MartinGuehmann Oct 11, 2020
e096511
Better label the buttons with what they do in WindowClusterDetectionR…
MartinGuehmann Oct 11, 2020
3c079c4
Add the sequences to the sequence groups as they show up in WindowClu…
MartinGuehmann Oct 11, 2020
b3a489a
Flexibilize the naming of the new sequence groups and conserve the in…
MartinGuehmann Oct 11, 2020
e9e4b0b
Use Integer as key in HashMap for only moving selected sequences to s…
MartinGuehmann Oct 11, 2020
b1dd26b
Remove unused parameter from ClusterMethods.computeSimpleAttractionVa…
MartinGuehmann Oct 13, 2020
44dc625
Cleanup white space in ClusterMethods.java, no code changes
MartinGuehmann Oct 13, 2020
bfa9844
Further code cleanup in ClusterMethods.java
MartinGuehmann Oct 13, 2020
eed57af
Remove minpal from the argument lists of computeSimpleAttractionValue…
MartinGuehmann Oct 13, 2020
c7dcedb
Prepare to merge duplicated code in compute_attraction_values() by ma…
MartinGuehmann Oct 13, 2020
08e6da3
Cleanup white space in ClusterData.java
MartinGuehmann Oct 13, 2020
15189d5
Merge duplicated code in ClusterData.compute_attraction_values
MartinGuehmann Oct 13, 2020
bec46f2
Merge more duplicated code in ClusterData.compute_attraction_values
MartinGuehmann Oct 13, 2020
5dbbb2e
Fix averaging the attraction values in ClusterData.compute_attraction…
MartinGuehmann Oct 13, 2020
a801c31
Provide -rounds synonym for the -dorounds command line parameter, sin…
MartinGuehmann Oct 13, 2020
7fe6d05
Add convex clustering to the no graphical user interface options
MartinGuehmann Oct 13, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions MakeClans.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/bash

rm -f -r clans/
javac -d ./ src/clans/*.java \
src/clans/model/*.java \
src/clans/model/proteins/*.java \
src/clans/model/microarray/*.java \
src/clans/misc/*.java \
src/clans/io/*.java \
src/clans/headless/*.java \
src/clans/gui/*.java \
src/clans/algorithms/*.java \
src/clans/algorithms/fruchtermanreingold/*.java

jar cfe clans.jar clans.Main clans/
87 changes: 81 additions & 6 deletions src/clans/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.util.*;

import clans.algorithms.EnrichWithSimilarSequences;
import clans.algorithms.ClusterDetection;
import clans.gui.ProgramWindow;
import clans.headless.ClusteringWithoutGui;
import clans.io.AlignmentHandling;
Expand All @@ -14,6 +15,8 @@
import clans.model.ClusterData;
import clans.model.proteins.AminoAcidSequence;
import clans.model.proteins.MinimalHsp;
import clans.model.SequenceCluster;
import clans.model.SequenceGroup;

public class Main {

Expand Down Expand Up @@ -95,6 +98,9 @@ public static void main(String[] args) {
// and -load)
static boolean initialize = false; // if true will initialize the clustermap upon loading
static int dorounds = -1; // how many rounds to cluster by (only if used in conjunction with -load)
static boolean clusterConvex = false; // Do convex clustering and add the clusters to the output file (only if used in conjunction with -load, -savetoname, and dorounds >= 0)
static int minClusterSeqNum = 2; // Minimum number of sequences in a cluster (only with -load, -savetoname, dorounds >= 0, and clusterConvex)
static float stdevCutoff = 0.5f; // Standard deviation cutoff for convex clustering (only with -load, -savetoname, dorounds >= 0, and clusterConvex)

// variables used for adding new sequences to an already present dataset
static String olddata = "";
Expand All @@ -116,9 +122,13 @@ static void print_usage_help() {
System.out.println("-infile name of input file");
System.out.println("-load name-of-savefile");
System.out.println("-rounds (int) (def:" + dorounds
+ ") how many rounds to cluster for (only used in conjunction with -load)");
+ ") how many rounds to cluster for (only with -load and -saveto)");
System.out
.println("-saveto String where to save the results to (only used in conjunction with -load and -rounds)");
.println("-saveto String where to save the results to (only with -load and -rounds)");
System.out.println("-clusterConvex boolean if true saves groups from convex clustering (def: " + clusterConvex + ", only with -load, -rounds, and -saveto)");
System.out.println("-minClusterSeqNum minimum number of sequences in clustered computed with -clusterConvex (def: " + minClusterSeqNum + ", only with -load, -rounds, -saveto, and -clusterConvex)");
System.out.println("-stdevCutoff standard deviation cutoff with -clusterConvex (def: " + stdevCutoff + ", only with -load, -rounds, -saveto, and -clusterConvex)");

System.out.println("-initialize boolean (t/F) if true randomly initializes the graph in runs without GUI");
System.out.println("-loadalt name-of-alternate-format-savefile");
System.out.println("-lowmem t/F (doesn't do much at the moment)");
Expand Down Expand Up @@ -167,6 +177,12 @@ static void print_settings() {
if (input_filename != null && dorounds >= 0) {
System.out.println("rounds=" + dorounds);
System.out.println("saveto=" + savetoname);
System.out.println("clusterConvex=" + clusterConvex);

if(clusterConvex) {
System.out.println("minClusterSeqNum=" + minClusterSeqNum);
System.out.println("stdevCutoff=" + stdevCutoff);
}
}

System.out.println("cmd=" + cmd);
Expand Down Expand Up @@ -357,7 +373,7 @@ static boolean start_computations() {

// add only the new matches
ArrayList<MinimalHsp> addblasthits = new ArrayList<MinimalHsp>();
for (int i = newblasthits.length; --i >= 0;) {
for (int i = newblasthits.length; --i >= 0;) {
if (newblasthits[i].query >= readelements || newblasthits[i].hit >= readelements) {
addblasthits.add(newblasthits[i]);
}
Expand Down Expand Up @@ -538,6 +554,19 @@ private static boolean run_clans_without_gui() {
}
}

if(clusterConvex) {
Vector<SequenceCluster> clusters = ClusterDetection.getConvex(myclusterer.data.attractionValues,
stdevCutoff,
minClusterSeqNum,
myclusterer.data.elements,
myclusterer.data.cpu);
for(int i = 0; i < clusters.size(); i++) {

SequenceCluster seqCluster = clusters.get(i);
myclusterer.data.add_group(seqCluster.name, seqCluster.members);
}
}

File savefile = new File(savetoname);

try{
Expand Down Expand Up @@ -629,6 +658,52 @@ static boolean parse_arguments(String[] args) {
continue;
}

if ((args[i].equalsIgnoreCase("-clusterConvex"))) {
i++;
if (i < args.length) {
clusterConvex = (args[i].equalsIgnoreCase("TRUE") || args[i].equalsIgnoreCase("T"));
} else {
System.err.println("Error reading -clusterConvex, missing argument.");
return false;
}
i++;
continue;
}

if ((args[i].equalsIgnoreCase("-minClusterSeqNum")) || (args[i].equalsIgnoreCase("-min"))) {
i++;
if ((i) < args.length) {
try {
minClusterSeqNum = Integer.parseInt(args[i]);
} catch (NumberFormatException e) {
System.err.println("unable to parse int from " + args[i] + " in -minClusterSeqNum.");
return false;
}
} else {
System.err.println("Error reading -minClusterSeqNum, missing argument.");
return false;
}
i++;
continue;
}

if ((args[i].equalsIgnoreCase("-stdevCutoff")) || (args[i].equalsIgnoreCase("-std"))) {
i++;
if ((i) < args.length) {
try {
stdevCutoff = Float.parseFloat(args[i]);
} catch (NumberFormatException e) {
System.err.println("unable to parse float from '" + args[i] + "' in -stdevCutoff");
return false;
}
} else {
System.err.println("Error reading -stdevCutoff, missing argument.");
return false;
}
i++;
continue;
}

if ((args[i].equalsIgnoreCase("-referencedb")) || (args[i].equalsIgnoreCase("-refdb"))) {
i++;
if (i < args.length) {
Expand Down Expand Up @@ -896,17 +971,17 @@ static boolean parse_arguments(String[] args) {
continue;
}

if ((args[i].equalsIgnoreCase("-dorounds"))) {
if ((args[i].equalsIgnoreCase("-dorounds") || args[i].equalsIgnoreCase("-rounds"))) {
i++;
if ((i) < args.length) {
try {
dorounds = Integer.parseInt(args[i]);
} catch (NumberFormatException e) {
System.err.println("unable to parse int from " + args[i] + " in -dorounds.");
System.err.println("unable to parse int from " + args[i] + " in -rounds.");
return false;
}
} else {
System.err.println("Error reading -dorounds, missing argument.");
System.err.println("Error reading -rounds, missing argument.");
return false;
}
i++;
Expand Down
Loading