diff --git a/README.md b/README.md index 5d648aa..fbedd21 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ usage: gSpan -h,--help Help -i,--min-node Minimum number of nodes for each sub-graph -r,--result File path of result + -t,--graph-type Type of graph: directed / undirected (default: undirected) -s,--sup (Required) Minimum support ``` diff --git a/pom.xml b/pom.xml index 84f8c75..e9f41a0 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ gSpan gSpan.Java - 1.0 + 1.2 @@ -19,6 +19,15 @@ maven-assembly-plugin + + + make-jar-with-dependencies + package + + single + + + @@ -41,5 +50,5 @@ 1.4 - + \ No newline at end of file diff --git a/src/main/java/io/github/tonyzzx/gspan/Main.java b/src/main/java/io/github/tonyzzx/gspan/Main.java index 04ba9e9..f5b7a5b 100644 --- a/src/main/java/io/github/tonyzzx/gspan/Main.java +++ b/src/main/java/io/github/tonyzzx/gspan/Main.java @@ -18,13 +18,16 @@ public static void main(String[] args) throws IOException { try (FileWriter writer = new FileWriter(outFile)) { gSpan gSpan = new gSpan(); System.out.println("gSpan is mining..."); - gSpan.run(reader, writer, arguments.minSup, arguments.maxNodeNum, arguments.minNodeNum); + gSpan.run(reader, writer, arguments.minSup, arguments.maxNodeNum, arguments.minNodeNum, arguments.directed, arguments.singleNodes); System.out.println("It's done! The result is in the " + arguments.outFilePath + "."); } } } private static class Arguments { + public static final String GRAPH_TYPE_UNDIRECTED = "undirected"; + public static final String GRAPH_TYPE_DIRECTED = "directed"; + public static final String GRAPH_TYPE_DEFAULT = GRAPH_TYPE_UNDIRECTED; private static Arguments arguments; private String[] args; @@ -34,6 +37,8 @@ private static class Arguments { long minNodeNum = 0; long maxNodeNum = Long.MAX_VALUE; String outFilePath; + boolean directed = false; + boolean singleNodes = false; private Arguments(String[] args) { this.args = args; @@ -59,6 +64,8 @@ private void initFromCmd() { options.addOption("i", "min-node", true, "Minimum number of nodes for each sub-graph"); options.addOption("a", "max-node", true, "Maximum number of nodes for each sub-graph"); options.addOption("r", "result", true, "File path of result"); + options.addOption("t", "graph-type", true, "Type of graph: " + GRAPH_TYPE_DIRECTED + " / " + GRAPH_TYPE_UNDIRECTED + " (default: " + GRAPH_TYPE_DEFAULT + ")"); + options.addOption("n", "single-nodes", false, "Single nodes (nodes with same label are merged)"); options.addOption("h", "help", false, "Help"); CommandLineParser parser = new DefaultParser(); @@ -80,6 +87,13 @@ private void initFromCmd() { minNodeNum = Long.parseLong(cmd.getOptionValue("i", "0")); maxNodeNum = Long.parseLong(cmd.getOptionValue("a", String.valueOf(Long.MAX_VALUE))); outFilePath = cmd.getOptionValue("r", inFilePath + "_result"); + String graphType = cmd.getOptionValue("t", GRAPH_TYPE_DEFAULT); + if (!(GRAPH_TYPE_DIRECTED.equals(graphType) || GRAPH_TYPE_UNDIRECTED.equals(graphType))) { + System.out.println("Graph type not valid, was: " + graphType + ", valid: " + GRAPH_TYPE_DIRECTED + " / " + GRAPH_TYPE_UNDIRECTED); + System.exit(1); + } + directed = GRAPH_TYPE_DIRECTED.equals(graphType); + singleNodes = cmd.hasOption('n'); } /*** diff --git a/src/main/java/io/github/tonyzzx/gspan/gSpan.java b/src/main/java/io/github/tonyzzx/gspan/gSpan.java index 93dfffb..167fb05 100644 --- a/src/main/java/io/github/tonyzzx/gspan/gSpan.java +++ b/src/main/java/io/github/tonyzzx/gspan/gSpan.java @@ -4,7 +4,7 @@ import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; -import java.util.ArrayList; +import java.util.*; import java.util.Map.Entry; import io.github.tonyzzx.gspan.model.DFSCode; @@ -15,10 +15,6 @@ import io.github.tonyzzx.gspan.model.Projected; import io.github.tonyzzx.gspan.model.Vertex; -import java.util.NavigableMap; -import java.util.TreeMap; -import java.util.Vector; - public class gSpan { private ArrayList TRANS; private DFSCode DFS_CODE; @@ -30,6 +26,7 @@ public class gSpan { private long maxPat_min; private long maxPat_max; private boolean directed; + private boolean singleNodes; private FileWriter os; // Singular vertex handling stuff [graph][vertexLabel] = count. @@ -56,13 +53,14 @@ public gSpan() { * @param minNodeNum Minimum number of nodes * @throws IOException */ - void run(FileReader reader, FileWriter writers, long minSup, long maxNodeNum, long minNodeNum) throws IOException { + void run(FileReader reader, FileWriter writers, long minSup, long maxNodeNum, long minNodeNum, boolean directed, boolean singleNodes) throws IOException { os = writers; ID = 0; this.minSup = minSup; maxPat_min = minNodeNum; maxPat_max = maxNodeNum; - directed = false; + this.directed = directed; + this.singleNodes = singleNodes; read(reader); runIntern(); @@ -70,11 +68,45 @@ void run(FileReader reader, FileWriter writers, long minSup, long maxNodeNum, lo private void read(FileReader is) throws IOException { BufferedReader read = new BufferedReader(is); - while (true) { - Graph g = new Graph(directed); - read = g.read(read); - if (g.isEmpty()) - break; + List result = new ArrayList<>(); + String line; + Graph g = new Graph(directed); + while ((line = read.readLine()) != null) { + result.clear(); + String[] splitRead = line.split(" "); + Collections.addAll(result, splitRead); + + if (!result.isEmpty()) { + if (result.get(0).equals("t")) { + if (!g.isEmpty()) { + g.buildEdge(); + TRANS.add(g); + } + g = new Graph(directed); + } else if (result.get(0).equals("v") && result.size() >= 3) { + // int id = Integer.parseInt(result.get(1)); + Vertex vex = new Vertex(); + vex.label = Integer.parseInt(result.get(2)); + g.add(vex); + } else if (result.get(0).equals("e") && result.size() >= 4) { + int from = Integer.parseInt(result.get(1)); + int to = Integer.parseInt(result.get(2)); + int eLabel = Integer.parseInt(result.get(3)); + + if (g.size() <= from || g.size() <= to) { + throw new IllegalStateException("Format Error: define vertex lists before edges!"); + } + + g.get(from).push(from, to, eLabel); + + if (!directed) { + g.get(to).push(to, from, eLabel); + } + } + } + } + if (!g.isEmpty()) { + g.buildEdge(); TRANS.add(g); } read.close(); @@ -180,7 +212,7 @@ private void reportSingle(Graph g, NavigableMap nCount) throws sup += Common.getValue(it.getValue()); } - if (maxPat_max > maxPat_min && g.size() > maxPat_max) + if (maxPat_max >= maxPat_min && g.size() > maxPat_max) return; if (maxPat_min > 0 && g.size() < maxPat_min) return; @@ -192,13 +224,13 @@ private void reportSingle(Graph g, NavigableMap nCount) throws private void report(int sup) throws IOException { // Filter to small/too large graphs. - if (maxPat_max > maxPat_min && DFS_CODE.countNode() > maxPat_max) + if (maxPat_max >= maxPat_min && DFS_CODE.countNode() > maxPat_max) return; if (maxPat_min > 0 && DFS_CODE.countNode() < maxPat_min) return; Graph g = new Graph(directed); - DFS_CODE.toGraph(g); + DFS_CODE.toGraph(g, singleNodes); os.write("t # " + ID + " * " + sup + System.getProperty("line.separator")); g.write(os); ++ID; @@ -233,7 +265,7 @@ private void project(Projected projected) throws IOException { * still add edges within an existing sub-graph, without increasing the * number of nodes. */ - if (maxPat_max > maxPat_min && DFS_CODE.countNode() > maxPat_max) + if (maxPat_max >= maxPat_min && DFS_CODE.countNode() > maxPat_max) return; /* @@ -351,7 +383,7 @@ private boolean isMin() { if (DFS_CODE.size() == 1) return (true); - DFS_CODE.toGraph(GRAPH_IS_MIN); + DFS_CODE.toGraph(GRAPH_IS_MIN, singleNodes); DFS_CODE_IS_MIN.clear(); NavigableMap>> root = new TreeMap<>(); diff --git a/src/main/java/io/github/tonyzzx/gspan/model/DFSCode.java b/src/main/java/io/github/tonyzzx/gspan/model/DFSCode.java index 22fa233..6418389 100644 --- a/src/main/java/io/github/tonyzzx/gspan/model/DFSCode.java +++ b/src/main/java/io/github/tonyzzx/gspan/model/DFSCode.java @@ -1,6 +1,8 @@ package io.github.tonyzzx.gspan.model; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; public class DFSCode extends ArrayList { private static final long serialVersionUID = 1L; @@ -24,22 +26,60 @@ public void pop() { this.remove(this.size() - 1); } - public void toGraph(Graph g) { + public void toGraph(Graph g, boolean singleNodes) { g.clear(); - for (DFS it : this) { - g.resize(Math.max(it.from, it.to) + 1); + // Version 1: Multiple nodes for nodes with same label + if (!singleNodes) { + for (DFS it : this) { + g.resize(Math.max(it.from, it.to) + 1); - if (it.fromLabel != -1) - g.get(it.from).label = it.fromLabel; - if (it.toLabel != -1) - g.get(it.to).label = it.toLabel; + if (it.fromLabel != -1) + g.get(it.from).label = it.fromLabel; + if (it.toLabel != -1) + g.get(it.to).label = it.toLabel; - g.get(it.from).push(it.from, it.to, it.eLabel); - if (!g.directed) - g.get(it.to).push(it.to, it.from, it.eLabel); + g.get(it.from).push(it.from, it.to, it.eLabel); + if (!g.directed) + g.get(it.to).push(it.to, it.from, it.eLabel); + } + } else { + // Version 2: One node for nodes with the same label + // Create label mapping + Map labelMapping = new HashMap<>(); + Map idMapping = new HashMap<>(); + int counter = 0; + for (DFS it : this) { + if (it.fromLabel != -1 && !labelMapping.containsKey(it.fromLabel)) { + labelMapping.put(it.fromLabel, counter); + counter++; + } + if (it.toLabel != -1 && !labelMapping.containsKey(it.toLabel)) { + labelMapping.put(it.toLabel, counter); + counter++; + } + if (it.fromLabel != -1) { + idMapping.put(it.from, labelMapping.get(it.fromLabel)); + } + if (it.toLabel != -1) { + idMapping.put(it.to, labelMapping.get(it.toLabel)); + } + } + // Create nodes + g.resize(counter); + // Set labels + for (Map.Entry entry : labelMapping.entrySet()) { + g.get(entry.getValue()).label = entry.getKey(); + } + // Create edges + for (DFS it : this) { + int fromId = idMapping.get(it.from); + int toId = idMapping.get(it.to); + g.get(fromId).push(fromId, toId, it.eLabel); + if (!g.directed) + g.get(toId).push(toId, fromId, it.eLabel); + } } - g.buildEdge(); } @@ -61,6 +101,7 @@ public ArrayList buildRMPath() { /** * Return number of nodes in the graph. + * * @return number of nodes in the graph */ public int countNode() { diff --git a/src/main/java/io/github/tonyzzx/gspan/model/Graph.java b/src/main/java/io/github/tonyzzx/gspan/model/Graph.java index 94918f4..f426670 100644 --- a/src/main/java/io/github/tonyzzx/gspan/model/Graph.java +++ b/src/main/java/io/github/tonyzzx/gspan/model/Graph.java @@ -17,7 +17,7 @@ public Graph(boolean directed) { this.directed = directed; } - void buildEdge() { + public void buildEdge() { String buf; NavigableMap tmp = new TreeMap<>(); @@ -43,51 +43,6 @@ void buildEdge() { edge_size = id; } - public BufferedReader read(BufferedReader is) throws IOException { - ArrayList result = new ArrayList<>(); - String line; - - clear(); - - while ((line = is.readLine()) != null) { - result.clear(); - String[] splitRead = line.split(" "); - Collections.addAll(result, splitRead); - - if (!result.isEmpty()) { - if (result.get(0).equals("t")) { - if (!this.isEmpty()) { // use as delimiter - break; - } - } else if (result.get(0).equals("v") && result.size() >= 3) { - // int id = Integer.parseInt(result.get(1)); - Vertex vex = new Vertex(); - vex.label = Integer.parseInt(result.get(2)); - this.add(vex); - } else if (result.get(0).equals("e") && result.size() >= 4) { - int from = Integer.parseInt(result.get(1)); - int to = Integer.parseInt(result.get(2)); - int eLabel = Integer.parseInt(result.get(3)); - - if (this.size() <= from || this.size() <= to) { - System.out.println("Format Error: define vertex lists before edges"); - return null; - } - - this.get(from).push(from, to, eLabel); - - if (!directed) { - this.get(to).push(to, from, eLabel); - } - } - } - } - - buildEdge(); - - return is; - } - public void write(FileWriter os) throws IOException { String buf; // Sort the result of edges. diff --git a/src/main/java/io/github/tonyzzx/gspan/model/Vertex.java b/src/main/java/io/github/tonyzzx/gspan/model/Vertex.java index ea4c870..acc6ab0 100644 --- a/src/main/java/io/github/tonyzzx/gspan/model/Vertex.java +++ b/src/main/java/io/github/tonyzzx/gspan/model/Vertex.java @@ -10,7 +10,7 @@ public Vertex() { edge = new ArrayList<>(); } - void push(int from, int to, int eLabel) { + public void push(int from, int to, int eLabel) { Edge e = new Edge(); e.from = from; e.to = to; diff --git a/target/gSpan.Java-1.1-jar-with-dependencies.jar b/target/gSpan.Java-1.1-jar-with-dependencies.jar new file mode 100644 index 0000000..c0d7a17 Binary files /dev/null and b/target/gSpan.Java-1.1-jar-with-dependencies.jar differ diff --git a/target/gSpan.Java-1.2-jar-with-dependencies.jar b/target/gSpan.Java-1.2-jar-with-dependencies.jar new file mode 100644 index 0000000..6722656 Binary files /dev/null and b/target/gSpan.Java-1.2-jar-with-dependencies.jar differ