From 0406142b5da1a3bd9a7a841c6cc86ae8740705bd Mon Sep 17 00:00:00 2001 From: kubaszar Date: Tue, 27 Jun 2017 18:02:53 +0200 Subject: [PATCH 1/8] Added Turbo Matching --- README.md | 4 +- .../algorithms/graph/TurboMatching.java | 112 +++++++++++++++ .../graph/test/TurboMatchingTest.java | 134 ++++++++++++++++++ 3 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 src/com/jwetherell/algorithms/graph/TurboMatching.java create mode 100644 test/com/jwetherell/algorithms/graph/test/TurboMatchingTest.java diff --git a/README.md b/README.md index eeb6fb6c..c4a4f9b9 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,9 @@ This is a collection of algorithms and data structures which I've implement over * Graph Traversal - [Depth First Traversal](src/com/jwetherell/algorithms/graph/DepthFirstTraversal.java) - [Breadth First Traversal](src/com/jwetherell/algorithms/graph/BreadthFirstTraversal.java) -* [Edmonds Karp](src/com/jwetherell/algorithms/graph/EdmondsKarp.java) +* [Edmonds Karp](src/com/jwetherell/algorithms/graph/EdmondsKarp.java) +* Matching + - [Turbo Matching](src/com/jwetherell/algorithms/graph/TurboMatching.java) ## Search * Get index of value in array diff --git a/src/com/jwetherell/algorithms/graph/TurboMatching.java b/src/com/jwetherell/algorithms/graph/TurboMatching.java new file mode 100644 index 00000000..b783c02b --- /dev/null +++ b/src/com/jwetherell/algorithms/graph/TurboMatching.java @@ -0,0 +1,112 @@ +package com.jwetherell.algorithms.graph; + +import com.jwetherell.algorithms.data_structures.Graph; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * In the mathematical discipline of graph theory, a matching or independent edge set + * in a graph is a set of edges without common vertices. In some matchings, all the vertices + * may incident with some edge of the matching, but this is not required and can only occur + * if the number of vertices is even. + *

+ * https://en.wikipedia.org/wiki/Matching_(graph_theory) + *
+ * @author Jakub Szarawarski + */ +public class TurboMatching { + + /** + * Computes maximum matching, using turbomatching algorithm based on augmenting paths with O(EV) complexity. + * + * @param graph bipartite graph + * @param parameter of graph on which network is based + * @return a MatchingResult class instance containg a map of mates for each paired vertex and number of pairs + */ + public static > MatchingResult getMaximumMatching(Graph graph){ + Map, Graph.Vertex> mate = new HashMap<>(); + + while(pathset(graph, mate)); + + return new MatchingResult<>(mate); + } + + /** + * Searches for an augmenting path for each unmatched vertex. + * + * @param graph bipartite graph + * @param mate map containing a mate for each matched vertex + * @return information if any augmenting path was found + */ + private static > boolean pathset(Graph graph, Map, Graph.Vertex> mate){ + Set> visited = new HashSet<>(); + + boolean result = false; + + for(Graph.Vertex vertex : graph.getVertices()){ + if (mate.containsKey(vertex) == false){ + if (path(graph, mate, visited, vertex)) + result = true; + } + } + + return result; + } + + /** + * Searches for an augmenting path for a vertex. + * Refreshes mates map appropriately. + * + * @param graph bipartite graph + * @param mate map containing a mate for each matched vertex + * @param visited set containing vertices visited in current pathset + * @param vertex regarded vertex + * @param parameter of graph on which network is based + * @return information if an augmenting path was found + */ + private static > boolean path(Graph graph, Map, Graph.Vertex> mate, Set> visited, Graph.Vertex vertex){ + if (visited.contains(vertex)) return false; + visited.add(vertex); + + for(Graph.Edge edge : vertex.getEdges()){ + + Graph.Vertex neighbour = edge.getFromVertex().equals(vertex) ? edge.getToVertex() : edge.getFromVertex(); + + if (mate.containsKey(neighbour) == false || path(graph, mate, visited, mate.get(neighbour))){ + mate.put(vertex, neighbour); + mate.put(neighbour, vertex); + return true; + } + } + + return false; + } + + + public static class MatchingResult>{ + private final Map, Graph.Vertex> mate; + private final int size; + + private MatchingResult(Map, Graph.Vertex> mate){ + this.mate = mate; + this.size = mate.size()/2; + } + + /** + * @return the number of edges in independent edge set + */ + public int getSize(){ + return this.size; + } + + /** + * @return a symetric map that contains a mate for each matched vertex + */ + public Map, Graph.Vertex> getMate(){ + return this.mate; + } + } +} diff --git a/test/com/jwetherell/algorithms/graph/test/TurboMatchingTest.java b/test/com/jwetherell/algorithms/graph/test/TurboMatchingTest.java new file mode 100644 index 00000000..cdb51acf --- /dev/null +++ b/test/com/jwetherell/algorithms/graph/test/TurboMatchingTest.java @@ -0,0 +1,134 @@ +package com.jwetherell.algorithms.graph.test; + +import com.jwetherell.algorithms.data_structures.Graph; +import com.jwetherell.algorithms.graph.TurboMatching; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.*; + + +public class TurboMatchingTest { + + private final Graph.Vertex v_a1 = new Graph.Vertex<>(1); + private final Graph.Vertex v_a2 = new Graph.Vertex<>(2); + private final Graph.Vertex v_a3 = new Graph.Vertex<>(3); + private final Graph.Vertex v_b1 = new Graph.Vertex<>(4); + private final Graph.Vertex v_b2 = new Graph.Vertex<>(5); + private final Graph.Vertex v_b3 = new Graph.Vertex<>(6); + + private List> vertices = new ArrayList<>(); + + { + vertices.add(v_a1); + vertices.add(v_a2); + vertices.add(v_a3); + vertices.add(v_b1); + vertices.add(v_b2); + vertices.add(v_b3); + } + @Test + public void testFullBipartiteGraph(){ + List> edges = new ArrayList<>(); + { + edges.add(new Graph.Edge<>(1, v_a1, v_b1)); + edges.add(new Graph.Edge<>(1, v_a1, v_b2)); + edges.add(new Graph.Edge<>(1, v_a1, v_b3)); + edges.add(new Graph.Edge<>(1, v_a2, v_b1)); + edges.add(new Graph.Edge<>(1, v_a2, v_b2)); + edges.add(new Graph.Edge<>(1, v_a2, v_b3)); + edges.add(new Graph.Edge<>(1, v_a3, v_b1)); + edges.add(new Graph.Edge<>(1, v_a3, v_b2)); + edges.add(new Graph.Edge<>(1, v_a3, v_b3)); + } + + final Graph graph = new Graph<>(vertices, edges); + + TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); + assertTrue(matchingResult.getSize() == 3); + for(Graph.Vertex vertex : vertices){ + assertTrue(matchingResult.getMate().get(matchingResult.getMate().get(vertex)).equals(vertex)); + } + } + + @Test + public void testSingleEdgeForVertex(){ + List> edges = new ArrayList<>(); + { + edges.add(new Graph.Edge<>(1, v_a1, v_b1)); + edges.add(new Graph.Edge<>(1, v_a2, v_b2)); + edges.add(new Graph.Edge<>(1, v_a3, v_b3)); + } + + final Graph graph = new Graph<>(vertices, edges); + + TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); + + assertTrue(matchingResult.getSize() == 3); + assertTrue(matchingResult.getMate().get(v_a1).equals(v_b1)); + assertTrue(matchingResult.getMate().get(v_a2).equals(v_b2)); + assertTrue(matchingResult.getMate().get(v_a3).equals(v_b3)); + assertTrue(matchingResult.getMate().get(v_b1).equals(v_a1)); + assertTrue(matchingResult.getMate().get(v_b2).equals(v_a2)); + assertTrue(matchingResult.getMate().get(v_b3).equals(v_a3)); + } + + @Test + public void testEmptyGraph(){ + List> edges = new ArrayList<>(); + { + } + + final Graph graph = new Graph<>(vertices, edges); + + TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); + + assertTrue(matchingResult.getSize() == 0); + assertTrue(matchingResult.getMate().isEmpty()); + } + + @Test + public void testTwoMatched(){ + List> edges = new ArrayList<>(); + { + edges.add(new Graph.Edge<>(1, v_a1, v_b1)); + edges.add(new Graph.Edge<>(1, v_a1, v_b3)); + edges.add(new Graph.Edge<>(1, v_a2, v_b2)); + edges.add(new Graph.Edge<>(1, v_a3, v_b2)); + } + + final Graph graph = new Graph<>(vertices, edges); + TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); + + assertTrue(matchingResult.getSize() == 2); + assertTrue(matchingResult.getMate().containsKey(v_a1)); + assertTrue(matchingResult.getMate().containsKey(v_b2)); + assertTrue(matchingResult.getMate().containsValue(v_a1)); + assertTrue(matchingResult.getMate().containsValue(v_b2)); + } + + @Test + public void testOneMatched(){ + List> edges = new ArrayList<>(); + { + edges.add(new Graph.Edge<>(1, v_a1, v_b1)); + edges.add(new Graph.Edge<>(1, v_a1, v_b2)); + edges.add(new Graph.Edge<>(1, v_a1, v_b3)); + } + + final Graph graph = new Graph<>(vertices, edges); + TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); + + assertTrue(matchingResult.getSize() == 1); + assertTrue(matchingResult.getMate().containsKey(v_a1)); + assertTrue(matchingResult.getMate().containsValue(v_a1)); + assertFalse(matchingResult.getMate().containsKey(v_a2)); + assertFalse(matchingResult.getMate().containsValue(v_a2)); + assertFalse(matchingResult.getMate().containsKey(v_a3)); + assertFalse(matchingResult.getMate().containsValue(v_a3)); + } + + +} \ No newline at end of file From a182542104506740631187154924a58d93e095ff Mon Sep 17 00:00:00 2001 From: kubaszar Date: Tue, 27 Jun 2017 20:53:59 +0200 Subject: [PATCH 2/8] Diamond operator fixed --- .../algorithms/graph/TurboMatching.java | 6 +- .../graph/test/TurboMatchingTest.java | 72 +++++++++---------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/com/jwetherell/algorithms/graph/TurboMatching.java b/src/com/jwetherell/algorithms/graph/TurboMatching.java index b783c02b..88120c82 100644 --- a/src/com/jwetherell/algorithms/graph/TurboMatching.java +++ b/src/com/jwetherell/algorithms/graph/TurboMatching.java @@ -27,11 +27,11 @@ public class TurboMatching { * @return a MatchingResult class instance containg a map of mates for each paired vertex and number of pairs */ public static > MatchingResult getMaximumMatching(Graph graph){ - Map, Graph.Vertex> mate = new HashMap<>(); + Map, Graph.Vertex> mate = new HashMap, Graph.Vertex>(); while(pathset(graph, mate)); - return new MatchingResult<>(mate); + return new MatchingResult(mate); } /** @@ -42,7 +42,7 @@ public static > MatchingResult getMaximumMatching(Gra * @return information if any augmenting path was found */ private static > boolean pathset(Graph graph, Map, Graph.Vertex> mate){ - Set> visited = new HashSet<>(); + Set> visited = new HashSet>(); boolean result = false; diff --git a/test/com/jwetherell/algorithms/graph/test/TurboMatchingTest.java b/test/com/jwetherell/algorithms/graph/test/TurboMatchingTest.java index cdb51acf..5e9be687 100644 --- a/test/com/jwetherell/algorithms/graph/test/TurboMatchingTest.java +++ b/test/com/jwetherell/algorithms/graph/test/TurboMatchingTest.java @@ -12,14 +12,14 @@ public class TurboMatchingTest { - private final Graph.Vertex v_a1 = new Graph.Vertex<>(1); - private final Graph.Vertex v_a2 = new Graph.Vertex<>(2); - private final Graph.Vertex v_a3 = new Graph.Vertex<>(3); - private final Graph.Vertex v_b1 = new Graph.Vertex<>(4); - private final Graph.Vertex v_b2 = new Graph.Vertex<>(5); - private final Graph.Vertex v_b3 = new Graph.Vertex<>(6); + private final Graph.Vertex v_a1 = new Graph.Vertex(1); + private final Graph.Vertex v_a2 = new Graph.Vertex(2); + private final Graph.Vertex v_a3 = new Graph.Vertex(3); + private final Graph.Vertex v_b1 = new Graph.Vertex(4); + private final Graph.Vertex v_b2 = new Graph.Vertex(5); + private final Graph.Vertex v_b3 = new Graph.Vertex(6); - private List> vertices = new ArrayList<>(); + private List> vertices = new ArrayList>(); { vertices.add(v_a1); @@ -31,20 +31,20 @@ public class TurboMatchingTest { } @Test public void testFullBipartiteGraph(){ - List> edges = new ArrayList<>(); + List> edges = new ArrayList>(); { - edges.add(new Graph.Edge<>(1, v_a1, v_b1)); - edges.add(new Graph.Edge<>(1, v_a1, v_b2)); - edges.add(new Graph.Edge<>(1, v_a1, v_b3)); - edges.add(new Graph.Edge<>(1, v_a2, v_b1)); - edges.add(new Graph.Edge<>(1, v_a2, v_b2)); - edges.add(new Graph.Edge<>(1, v_a2, v_b3)); - edges.add(new Graph.Edge<>(1, v_a3, v_b1)); - edges.add(new Graph.Edge<>(1, v_a3, v_b2)); - edges.add(new Graph.Edge<>(1, v_a3, v_b3)); + edges.add(new Graph.Edge(1, v_a1, v_b1)); + edges.add(new Graph.Edge(1, v_a1, v_b2)); + edges.add(new Graph.Edge(1, v_a1, v_b3)); + edges.add(new Graph.Edge(1, v_a2, v_b1)); + edges.add(new Graph.Edge(1, v_a2, v_b2)); + edges.add(new Graph.Edge(1, v_a2, v_b3)); + edges.add(new Graph.Edge(1, v_a3, v_b1)); + edges.add(new Graph.Edge(1, v_a3, v_b2)); + edges.add(new Graph.Edge(1, v_a3, v_b3)); } - final Graph graph = new Graph<>(vertices, edges); + final Graph graph = new Graph(vertices, edges); TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); assertTrue(matchingResult.getSize() == 3); @@ -55,14 +55,14 @@ public void testFullBipartiteGraph(){ @Test public void testSingleEdgeForVertex(){ - List> edges = new ArrayList<>(); + List> edges = new ArrayList>(); { - edges.add(new Graph.Edge<>(1, v_a1, v_b1)); - edges.add(new Graph.Edge<>(1, v_a2, v_b2)); - edges.add(new Graph.Edge<>(1, v_a3, v_b3)); + edges.add(new Graph.Edge(1, v_a1, v_b1)); + edges.add(new Graph.Edge(1, v_a2, v_b2)); + edges.add(new Graph.Edge(1, v_a3, v_b3)); } - final Graph graph = new Graph<>(vertices, edges); + final Graph graph = new Graph(vertices, edges); TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); @@ -77,11 +77,11 @@ public void testSingleEdgeForVertex(){ @Test public void testEmptyGraph(){ - List> edges = new ArrayList<>(); + List> edges = new ArrayList>(); { } - final Graph graph = new Graph<>(vertices, edges); + final Graph graph = new Graph(vertices, edges); TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); @@ -91,15 +91,15 @@ public void testEmptyGraph(){ @Test public void testTwoMatched(){ - List> edges = new ArrayList<>(); + List> edges = new ArrayList>(); { - edges.add(new Graph.Edge<>(1, v_a1, v_b1)); - edges.add(new Graph.Edge<>(1, v_a1, v_b3)); - edges.add(new Graph.Edge<>(1, v_a2, v_b2)); - edges.add(new Graph.Edge<>(1, v_a3, v_b2)); + edges.add(new Graph.Edge(1, v_a1, v_b1)); + edges.add(new Graph.Edge(1, v_a1, v_b3)); + edges.add(new Graph.Edge(1, v_a2, v_b2)); + edges.add(new Graph.Edge(1, v_a3, v_b2)); } - final Graph graph = new Graph<>(vertices, edges); + final Graph graph = new Graph(vertices, edges); TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); assertTrue(matchingResult.getSize() == 2); @@ -111,14 +111,14 @@ public void testTwoMatched(){ @Test public void testOneMatched(){ - List> edges = new ArrayList<>(); + List> edges = new ArrayList>(); { - edges.add(new Graph.Edge<>(1, v_a1, v_b1)); - edges.add(new Graph.Edge<>(1, v_a1, v_b2)); - edges.add(new Graph.Edge<>(1, v_a1, v_b3)); + edges.add(new Graph.Edge(1, v_a1, v_b1)); + edges.add(new Graph.Edge(1, v_a1, v_b2)); + edges.add(new Graph.Edge(1, v_a1, v_b3)); } - final Graph graph = new Graph<>(vertices, edges); + final Graph graph = new Graph(vertices, edges); TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); assertTrue(matchingResult.getSize() == 1); From c7f24c3055b7dad1a23da3bc5c2b9b62a9277e71 Mon Sep 17 00:00:00 2001 From: kubaszar Date: Tue, 27 Jun 2017 22:12:29 +0200 Subject: [PATCH 3/8] Added Suffix Array --- README.md | 1 + .../data_structures/SuffixArray.java | 170 ++++++++++++++++++ .../data_structures/test/SuffixArrayTest.java | 45 +++++ 3 files changed, 216 insertions(+) create mode 100644 src/com/jwetherell/algorithms/data_structures/SuffixArray.java create mode 100644 test/com/jwetherell/algorithms/data_structures/test/SuffixArrayTest.java diff --git a/README.md b/README.md index c4a4f9b9..c8a64916 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ This is a collection of algorithms and data structures which I've implement over * [Skip List](src/com/jwetherell/algorithms/data_structures/SkipList.java) * [Splay Tree](src/com/jwetherell/algorithms/data_structures/SplayTree.java) * [Stack [backed by an array or a linked list]](src/com/jwetherell/algorithms/data_structures/Stack.java) +* [Suffix Array](src/com/jwetherell/algorithms/data_structures/SuffixArray.java) * [Suffix Tree (Ukkonen's algorithm)](src/com/jwetherell/algorithms/data_structures/SuffixTree.java) * [Suffix Trie [backed by a Trie]](src/com/jwetherell/algorithms/data_structures/SuffixTrie.java) * [Treap](src/com/jwetherell/algorithms/data_structures/Treap.java) diff --git a/src/com/jwetherell/algorithms/data_structures/SuffixArray.java b/src/com/jwetherell/algorithms/data_structures/SuffixArray.java new file mode 100644 index 00000000..10507afd --- /dev/null +++ b/src/com/jwetherell/algorithms/data_structures/SuffixArray.java @@ -0,0 +1,170 @@ +package com.jwetherell.algorithms.data_structures; + +import java.util.ArrayList; +import java.util.Collections; + +/** + * In computer science, a suffix array is a sorted array of all suffixes of a string. + * It is a data structure used, among others, in full text indices, data compression + * algorithms and within the field of bibliometrics. + * + * https://en.wikipedia.org/wiki/Suffix_array + * + * This implementation returns starting indexes instead of full suffixes + * + * @author Jakub Szarawarski + */ +public class SuffixArray { + private static final char DEFAULT_END_SEQ_CHAR = '$'; + private char END_SEQ_CHAR; + private String string; + private ArrayList suffixArray = null; + private ArrayList KMRarray = null; + + public SuffixArray(CharSequence sequence) { + this(sequence, DEFAULT_END_SEQ_CHAR); + } + + public SuffixArray(CharSequence sequence, char endChar) { + END_SEQ_CHAR = endChar; + string = buildStringWithEndChar(sequence); + } + + public ArrayList getSuffixArray() { + if(suffixArray == null){ + KMRalgorithm(); + } + return suffixArray; + } + + /** + * @return inverted suffix array + */ + public ArrayList getKMRarray() { + if (KMRarray == null) { + KMRalgorithm(); + } + return KMRarray; + } + + /** + * Creates suffix array using KMR algorithm with O(n log^2 n) complexity. + * + * For radius r: + * KMR[i] == k, + * when string[i..i+r-1] is kth r-letter substring of string sorted lexicographically + * KMR is counted for radius = 1,2,4,8 ... + * KMR for radius bigger than string length is the inverted suffix array + */ + private void KMRalgorithm() { + int length = string.length(); + + ArrayList KMR = getBasicKMR(length); + ArrayList KMRinvertedList = new ArrayList(); + + int radius = 1; + + while(radius < length){ + KMRinvertedList = getKMRinvertedList(KMR, radius, length); + KMR = getKMR(KMRinvertedList, radius, length); + radius *= 2; + } + + KMRarray = new ArrayList(KMR.subList(0, length)); + suffixArray = new ArrayList(); + KMRinvertedList.forEach(k -> suffixArray.add(k.index)); + } + + /** + * Creates KMR array for new radius from nearly inverted array. + * Elements from inverted array need to be grouped by substring tey represent. + * + * @param KMRinvertedList indexes are nearly inverted KMR array + * @param length string length + * @return KMR array for new radius + */ + private ArrayList getKMR(ArrayList KMRinvertedList, int radius, int length) { + ArrayList KMR = new ArrayList(length*2); + for(int i=0; i<2*length; i++) KMR.add(-1); + + int counter = 0; + for(int i=0; i0 && substringsAreEqual(KMRinvertedList, i)) + counter++; + KMR.set(KMRinvertedList.get(i).index, counter); + } + + return KMR; + } + + private boolean substringsAreEqual(ArrayList KMRinvertedList, int i) { + return KMRinvertedList.get(i-1).beginKMR.equals(KMRinvertedList.get(i).beginKMR) == false || + KMRinvertedList.get(i-1).endKMR.equals(KMRinvertedList.get(i).endKMR) == false; + } + + /** + * helper method to create KMR array for radius = radius from KMR array for radius = radius/2 + * + * @param KMR KMR array for radius = radius/2 + * @param radius new radius + * @param length string length + * @return list of KMRsWithIndex which indexes are nearly inverted KMR array + */ + private ArrayList getKMRinvertedList(ArrayList KMR, int radius, int length) { + ArrayList KMRinvertedList = new ArrayList(); + + for(int i=0; i { + if(A.beginKMR.equals(B.beginKMR) == false){ + return A.beginKMR.compareTo(B.beginKMR); + } + if(A.endKMR.equals(B.endKMR) == false){ + return A.endKMR.compareTo(B.endKMR); + } + return A.index.compareTo(B.index); + }); + + return KMRinvertedList; + } + + /** + * KMR array for radius=1, instead of initial natural numbers ascii codes are used + * + * @param length length of string + * @return pseudo KMR array for radius=1 + */ + private ArrayList getBasicKMR(int length) { + ArrayList result = new ArrayList(length*2); + char[] characters = string.toCharArray(); + for(int i=0; i suffixTree = new SuffixTree(string); + + Set suffixSet = suffixTree.getSuffixes(); + ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); + + int length = string.length(); + int i=0; + for(String suffix : suffixSet){ + String substring = string.substring(suffixArray.get(i++)); + assertTrue(suffix.equals(substring)); + } + } + + @Test + public void testKMRarray(){ + String string = "aasfaasdsadasdfasdasdasdasfdasfassdfas"; + + SuffixArray suffixArrayBuilder = new SuffixArray(string); + ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); + ArrayList KMRarray = suffixArrayBuilder.getKMRarray(); + + int length = string.length(); + for(int i=0; i Date: Tue, 27 Jun 2017 23:54:27 +0200 Subject: [PATCH 4/8] Lambdas removed --- .../data_structures/SuffixArray.java | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/com/jwetherell/algorithms/data_structures/SuffixArray.java b/src/com/jwetherell/algorithms/data_structures/SuffixArray.java index 10507afd..2e93faeb 100644 --- a/src/com/jwetherell/algorithms/data_structures/SuffixArray.java +++ b/src/com/jwetherell/algorithms/data_structures/SuffixArray.java @@ -2,6 +2,7 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; /** * In computer science, a suffix array is a sorted array of all suffixes of a string. @@ -72,7 +73,9 @@ private void KMRalgorithm() { KMRarray = new ArrayList(KMR.subList(0, length)); suffixArray = new ArrayList(); - KMRinvertedList.forEach(k -> suffixArray.add(k.index)); + for(KMRsWithIndex kmr : KMRinvertedList){ + suffixArray.add(kmr.index); + } } /** @@ -117,14 +120,17 @@ private ArrayList getKMRinvertedList(ArrayList KMR, int KMRinvertedList.add(new KMRsWithIndex(KMR.get(i), KMR.get(i+radius), i)); } - Collections.sort(KMRinvertedList, (A, B) -> { - if(A.beginKMR.equals(B.beginKMR) == false){ - return A.beginKMR.compareTo(B.beginKMR); - } - if(A.endKMR.equals(B.endKMR) == false){ - return A.endKMR.compareTo(B.endKMR); + Collections.sort(KMRinvertedList, new Comparator() { + @Override + public int compare(KMRsWithIndex A, KMRsWithIndex B) { + if (A.beginKMR.equals(B.beginKMR) == false) { + return A.beginKMR.compareTo(B.beginKMR); + } + if (A.endKMR.equals(B.endKMR) == false) { + return A.endKMR.compareTo(B.endKMR); + } + return A.index.compareTo(B.index); } - return A.index.compareTo(B.index); }); return KMRinvertedList; From 7f5ccd914380d279312c966bf251bad33296943f Mon Sep 17 00:00:00 2001 From: kubaszar Date: Wed, 28 Jun 2017 02:31:55 +0200 Subject: [PATCH 5/8] Added LCP Array Tests --- .../data_structures/test/LCPArrayTest.java | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 test/com/jwetherell/algorithms/data_structures/test/LCPArrayTest.java diff --git a/test/com/jwetherell/algorithms/data_structures/test/LCPArrayTest.java b/test/com/jwetherell/algorithms/data_structures/test/LCPArrayTest.java new file mode 100644 index 00000000..3bfbe3fa --- /dev/null +++ b/test/com/jwetherell/algorithms/data_structures/test/LCPArrayTest.java @@ -0,0 +1,52 @@ +package com.jwetherell.algorithms.data_structures.test; + +import com.jwetherell.algorithms.data_structures.LCPArray; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; + +import static org.junit.Assert.*; + +public class LCPArrayTest { + + @Test + public void smallTest(){ + String string = "asdasdd"; + LCPArray LCPArrayBuilder = new LCPArray(string); + ArrayList LCPArray = LCPArrayBuilder.getLCPArray(); + ArrayList result = new ArrayList(); + + result.addAll(Arrays.asList(null, 0, 3, 0, 1, 1, 0, 2)); + + assertEquals(LCPArray, result); + } + + @Test + public void longTest(){ + + String string = "aasfaasdsadasdfasdasdasdasfdasfassdfas"; + LCPArray LCPArrayBuilder = new LCPArray(string); + ArrayList LCPArray = LCPArrayBuilder.getLCPArray(); + ArrayList result = new ArrayList(); + + result.addAll(Arrays.asList(null, 0, 3, 1, 1, 2, 8, 5, 3, 3, 2, 4, 3, 2, 0, + 6, 4, 3, 4, 1, 4, 1, 0, 2, 3, 3, 1, 0, 1, 1, 7, 4, 2, 5, 2, 1, 3, 2, 1)); + + assertEquals(LCPArray, result); + } + + @Test + public void singleLetterTest(){ + + String string = "aaaaaaaaaaaa"; + LCPArray LCPArrayBuilder = new LCPArray(string); + ArrayList LCPArray = LCPArrayBuilder.getLCPArray(); + ArrayList result = new ArrayList(); + + result.addAll(Arrays.asList(null , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); + + assertEquals(LCPArray, result); + } + +} \ No newline at end of file From 8a951a7ba500a23b2e47371008bc0a4a463a187a Mon Sep 17 00:00:00 2001 From: kubaszar Date: Wed, 28 Jun 2017 02:32:27 +0200 Subject: [PATCH 6/8] Added LCP Array --- .../algorithms/data_structures/LCPArray.java | 84 +++++++++++++++++++ .../data_structures/SuffixArray.java | 4 + 2 files changed, 88 insertions(+) create mode 100644 src/com/jwetherell/algorithms/data_structures/LCPArray.java diff --git a/src/com/jwetherell/algorithms/data_structures/LCPArray.java b/src/com/jwetherell/algorithms/data_structures/LCPArray.java new file mode 100644 index 00000000..9726fc41 --- /dev/null +++ b/src/com/jwetherell/algorithms/data_structures/LCPArray.java @@ -0,0 +1,84 @@ +package com.jwetherell.algorithms.data_structures; + +import java.util.ArrayList; + +/** + * In computer science, the longest common prefix array (LCP array) is an auxiliary + * data structure to the suffix array. It stores the lengths of the longest common + * prefixes (LCPs) between all pairs of consecutive suffixes in a sorted suffix array. + * + * https://en.wikipedia.org/wiki/LCP_array + * + * @author Jakub Szarawarski + */ +public class LCPArray { + private static final char DEFAULT_END_SEQ_CHAR = '$'; + private char END_SEQ_CHAR; + + private SuffixArray suffixArrayBuilder; + + private ArrayList LCP; + + public LCPArray(CharSequence sequence){ + this(sequence, DEFAULT_END_SEQ_CHAR); + } + + public LCPArray(CharSequence sequence, char endChar) { + END_SEQ_CHAR = endChar; + suffixArrayBuilder = new SuffixArray(sequence, endChar); + } + + public ArrayList getLCPArray() { + if(LCP == null){ + LCPAlgorithm(); + } + return LCP; + } + + private void LCPAlgorithm() { + ArrayList LCPR = getLCPR(); + getLCPfromLCPR(LCPR); + } + + private ArrayList getLCPR() { + ArrayList KMRArray = suffixArrayBuilder.getKMRarray(); + ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); + String string = suffixArrayBuilder.getString(); + int length = KMRArray.size(); + + ArrayList LCPR = new ArrayList(); // helper array, LCP[i] = LCPR[suffixArray[i]] + + int startingValue = 0; + + for(int i=0; i 0 ? LCPRValue-1 : 0; + } + } + + return LCPR; + } + + private void getLCPfromLCPR(ArrayList LCPR) { + ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); + int length = suffixArray.size(); + + LCP = new ArrayList(); + LCP.add(null); //no value for LCP[0] + + for(int i=1; i getKMRarray() { return KMRarray; } + public String getString(){ + return string; + } + /** * Creates suffix array using KMR algorithm with O(n log^2 n) complexity. * From 9ccb6d478a53fc9015f94d1826cd4c5eeb6b4cc3 Mon Sep 17 00:00:00 2001 From: kubaszar Date: Wed, 28 Jun 2017 02:35:44 +0200 Subject: [PATCH 7/8] Added LCP Array - README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c8a64916..930ae6a0 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ This is a collection of algorithms and data structures which I've implement over * [Implicit Key Treap](src/com/jwetherell/algorithms/data_structures/ImplicitKeyTreap.java) * [KD Tree (k-dimensional tree or k-d tree)](src/com/jwetherell/algorithms/data_structures/KDTree.java) * [List [backed by an array or a linked list]](src/com/jwetherell/algorithms/data_structures/List.java) +* [LCP Array (Longest Common Prefix) [backed by a Suffix Array]](src/com/jwetherell/algorithms/data_structures/LCPArray.java) * [Matrix](src/com/jwetherell/algorithms/data_structures/Matrix.java) * [Patricia Trie](src/com/jwetherell/algorithms/data_structures/PatriciaTrie.java) * [Quad-Tree (Point-Region or MX-CIF)](src/com/jwetherell/algorithms/data_structures/QuadTree.java) From 2f826ce467022bfeedca194221ff0ef11e9d5775 Mon Sep 17 00:00:00 2001 From: Justin Wetherell Date: Mon, 3 Jul 2017 11:40:17 -0400 Subject: [PATCH 8/8] Code cleanup --- .../algorithms/data_structures/KdTree.java | 2 +- .../algorithms/data_structures/LCPArray.java | 49 +++++----- .../data_structures/SuffixArray.java | 92 +++++++++---------- .../data_structures/test/SuffixArrayTest.java | 1 - 4 files changed, 66 insertions(+), 78 deletions(-) diff --git a/src/com/jwetherell/algorithms/data_structures/KdTree.java b/src/com/jwetherell/algorithms/data_structures/KdTree.java index 0ed727e7..9fecb819 100644 --- a/src/com/jwetherell/algorithms/data_structures/KdTree.java +++ b/src/com/jwetherell/algorithms/data_structures/KdTree.java @@ -21,7 +21,7 @@ * useful data structure for several applications, such as searches involving a * multidimensional search key (e.g. range searches and nearest neighbor * searches). k-d trees are a special case of binary space partitioning trees. - * + *
* @author Justin Wetherell * @see K-d_tree (Wikipedia) */ diff --git a/src/com/jwetherell/algorithms/data_structures/LCPArray.java b/src/com/jwetherell/algorithms/data_structures/LCPArray.java index 9726fc41..88d299c8 100644 --- a/src/com/jwetherell/algorithms/data_structures/LCPArray.java +++ b/src/com/jwetherell/algorithms/data_structures/LCPArray.java @@ -6,17 +6,18 @@ * In computer science, the longest common prefix array (LCP array) is an auxiliary * data structure to the suffix array. It stores the lengths of the longest common * prefixes (LCPs) between all pairs of consecutive suffixes in a sorted suffix array. - * + *

* https://en.wikipedia.org/wiki/LCP_array - * + *
* @author Jakub Szarawarski + * @author Justin Wetherell */ public class LCPArray { + private static final char DEFAULT_END_SEQ_CHAR = '$'; - private char END_SEQ_CHAR; + private char END_SEQ_CHAR; private SuffixArray suffixArrayBuilder; - private ArrayList LCP; public LCPArray(CharSequence sequence){ @@ -25,43 +26,37 @@ public LCPArray(CharSequence sequence){ public LCPArray(CharSequence sequence, char endChar) { END_SEQ_CHAR = endChar; - suffixArrayBuilder = new SuffixArray(sequence, endChar); + suffixArrayBuilder = new SuffixArray(sequence, END_SEQ_CHAR); } public ArrayList getLCPArray() { - if(LCP == null){ + if (LCP == null) LCPAlgorithm(); - } return LCP; } private void LCPAlgorithm() { - ArrayList LCPR = getLCPR(); + final ArrayList LCPR = getLCPR(); getLCPfromLCPR(LCPR); } private ArrayList getLCPR() { - ArrayList KMRArray = suffixArrayBuilder.getKMRarray(); - ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); - String string = suffixArrayBuilder.getString(); - int length = KMRArray.size(); - - ArrayList LCPR = new ArrayList(); // helper array, LCP[i] = LCPR[suffixArray[i]] + final ArrayList KMRArray = suffixArrayBuilder.getKMRarray(); + final ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); + final String string = suffixArrayBuilder.getString(); + final int length = KMRArray.size(); + final ArrayList LCPR = new ArrayList(); // helper array, LCP[i] = LCPR[suffixArray[i]] int startingValue = 0; - - for(int i=0; i 0 ? LCPRValue-1 : 0; } @@ -71,14 +66,12 @@ private ArrayList getLCPR() { } private void getLCPfromLCPR(ArrayList LCPR) { - ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); - int length = suffixArray.size(); + final ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); + final int length = suffixArray.size(); LCP = new ArrayList(); LCP.add(null); //no value for LCP[0] - - for(int i=1; i * https://en.wikipedia.org/wiki/Suffix_array - * - * This implementation returns starting indexes instead of full suffixes - * + *

+ * NOTE: This implementation returns starting indexes instead of full suffixes + *
* @author Jakub Szarawarski + * @author Justin Wetherell */ public class SuffixArray { + + private static final StringBuilder STRING_BUILDER = new StringBuilder(); private static final char DEFAULT_END_SEQ_CHAR = '$'; + private char END_SEQ_CHAR; private String string; private ArrayList suffixArray = null; @@ -32,9 +36,8 @@ public SuffixArray(CharSequence sequence, char endChar) { } public ArrayList getSuffixArray() { - if(suffixArray == null){ + if (suffixArray == null) KMRalgorithm(); - } return suffixArray; } @@ -42,9 +45,8 @@ public ArrayList getSuffixArray() { * @return inverted suffix array */ public ArrayList getKMRarray() { - if (KMRarray == null) { + if (KMRarray == null) KMRalgorithm(); - } return KMRarray; } @@ -62,24 +64,22 @@ public String getString(){ * KMR for radius bigger than string length is the inverted suffix array */ private void KMRalgorithm() { - int length = string.length(); + final int length = string.length(); - ArrayList KMR = getBasicKMR(length); ArrayList KMRinvertedList = new ArrayList(); + ArrayList KMR = getBasicKMR(length); int radius = 1; - - while(radius < length){ + while (radius < length) { KMRinvertedList = getKMRinvertedList(KMR, radius, length); - KMR = getKMR(KMRinvertedList, radius, length); + KMR = getKMR(KMRinvertedList, length); radius *= 2; } KMRarray = new ArrayList(KMR.subList(0, length)); suffixArray = new ArrayList(); - for(KMRsWithIndex kmr : KMRinvertedList){ + for (KMRsWithIndex kmr : KMRinvertedList) suffixArray.add(kmr.index); - } } /** @@ -90,12 +90,13 @@ private void KMRalgorithm() { * @param length string length * @return KMR array for new radius */ - private ArrayList getKMR(ArrayList KMRinvertedList, int radius, int length) { - ArrayList KMR = new ArrayList(length*2); - for(int i=0; i<2*length; i++) KMR.add(-1); + private ArrayList getKMR(ArrayList KMRinvertedList, int length) { + final ArrayList KMR = new ArrayList(length*2); + for (int i=0; i<2*length; i++) + KMR.add(-1); int counter = 0; - for(int i=0; i0 && substringsAreEqual(KMRinvertedList, i)) counter++; KMR.set(KMRinvertedList.get(i).index, counter); @@ -105,8 +106,8 @@ private ArrayList getKMR(ArrayList KMRinvertedList, int } private boolean substringsAreEqual(ArrayList KMRinvertedList, int i) { - return KMRinvertedList.get(i-1).beginKMR.equals(KMRinvertedList.get(i).beginKMR) == false || - KMRinvertedList.get(i-1).endKMR.equals(KMRinvertedList.get(i).endKMR) == false; + return (KMRinvertedList.get(i-1).beginKMR.equals(KMRinvertedList.get(i).beginKMR) == false) || + (KMRinvertedList.get(i-1).endKMR.equals(KMRinvertedList.get(i).endKMR) == false); } /** @@ -118,25 +119,22 @@ private boolean substringsAreEqual(ArrayList KMRinvertedList, int * @return list of KMRsWithIndex which indexes are nearly inverted KMR array */ private ArrayList getKMRinvertedList(ArrayList KMR, int radius, int length) { - ArrayList KMRinvertedList = new ArrayList(); - - for(int i=0; i KMRinvertedList = new ArrayList(); + for (int i=0; i() { - @Override - public int compare(KMRsWithIndex A, KMRsWithIndex B) { - if (A.beginKMR.equals(B.beginKMR) == false) { - return A.beginKMR.compareTo(B.beginKMR); - } - if (A.endKMR.equals(B.endKMR) == false) { - return A.endKMR.compareTo(B.endKMR); + Collections.sort(KMRinvertedList, + new Comparator() { + @Override + public int compare(KMRsWithIndex A, KMRsWithIndex B) { + if (A.beginKMR.equals(B.beginKMR) == false) + return A.beginKMR.compareTo(B.beginKMR); + if (A.endKMR.equals(B.endKMR) == false) + return A.endKMR.compareTo(B.endKMR); + return A.index.compareTo(B.index); } - return A.index.compareTo(B.index); } - }); - + ); return KMRinvertedList; } @@ -147,23 +145,21 @@ public int compare(KMRsWithIndex A, KMRsWithIndex B) { * @return pseudo KMR array for radius=1 */ private ArrayList getBasicKMR(int length) { - ArrayList result = new ArrayList(length*2); - char[] characters = string.toCharArray(); - for(int i=0; i result = new ArrayList(length*2); + final char[] characters = string.toCharArray(); + for (int i=0; i suffixSet = suffixTree.getSuffixes(); ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); - int length = string.length(); int i=0; for(String suffix : suffixSet){ String substring = string.substring(suffixArray.get(i++));