diff --git a/README.md b/README.md index eeb6fb6c..930ae6a0 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ This is a collection of algorithms and data structures which I've implement over * [Implicit Key Treap](src/com/jwetherell/algorithms/data_structures/ImplicitKeyTreap.java) * [KD Tree (k-dimensional tree or k-d tree)](src/com/jwetherell/algorithms/data_structures/KDTree.java) * [List [backed by an array or a linked list]](src/com/jwetherell/algorithms/data_structures/List.java) +* [LCP Array (Longest Common Prefix) [backed by a Suffix Array]](src/com/jwetherell/algorithms/data_structures/LCPArray.java) * [Matrix](src/com/jwetherell/algorithms/data_structures/Matrix.java) * [Patricia Trie](src/com/jwetherell/algorithms/data_structures/PatriciaTrie.java) * [Quad-Tree (Point-Region or MX-CIF)](src/com/jwetherell/algorithms/data_structures/QuadTree.java) @@ -57,6 +58,7 @@ This is a collection of algorithms and data structures which I've implement over * [Skip List](src/com/jwetherell/algorithms/data_structures/SkipList.java) * [Splay Tree](src/com/jwetherell/algorithms/data_structures/SplayTree.java) * [Stack [backed by an array or a linked list]](src/com/jwetherell/algorithms/data_structures/Stack.java) +* [Suffix Array](src/com/jwetherell/algorithms/data_structures/SuffixArray.java) * [Suffix Tree (Ukkonen's algorithm)](src/com/jwetherell/algorithms/data_structures/SuffixTree.java) * [Suffix Trie [backed by a Trie]](src/com/jwetherell/algorithms/data_structures/SuffixTrie.java) * [Treap](src/com/jwetherell/algorithms/data_structures/Treap.java) @@ -150,7 +152,9 @@ This is a collection of algorithms and data structures which I've implement over * Graph Traversal - [Depth First Traversal](src/com/jwetherell/algorithms/graph/DepthFirstTraversal.java) - [Breadth First Traversal](src/com/jwetherell/algorithms/graph/BreadthFirstTraversal.java) -* [Edmonds Karp](src/com/jwetherell/algorithms/graph/EdmondsKarp.java) +* [Edmonds Karp](src/com/jwetherell/algorithms/graph/EdmondsKarp.java) +* Matching + - [Turbo Matching](src/com/jwetherell/algorithms/graph/TurboMatching.java) ## Search * Get index of value in array diff --git a/src/com/jwetherell/algorithms/data_structures/KdTree.java b/src/com/jwetherell/algorithms/data_structures/KdTree.java index 0ed727e7..9fecb819 100644 --- a/src/com/jwetherell/algorithms/data_structures/KdTree.java +++ b/src/com/jwetherell/algorithms/data_structures/KdTree.java @@ -21,7 +21,7 @@ * useful data structure for several applications, such as searches involving a * multidimensional search key (e.g. range searches and nearest neighbor * searches). k-d trees are a special case of binary space partitioning trees. - * + *
* @author Justin Wetherell * @see K-d_tree (Wikipedia) */ diff --git a/src/com/jwetherell/algorithms/data_structures/LCPArray.java b/src/com/jwetherell/algorithms/data_structures/LCPArray.java new file mode 100644 index 00000000..88d299c8 --- /dev/null +++ b/src/com/jwetherell/algorithms/data_structures/LCPArray.java @@ -0,0 +1,77 @@ +package com.jwetherell.algorithms.data_structures; + +import java.util.ArrayList; + +/** + * In computer science, the longest common prefix array (LCP array) is an auxiliary + * data structure to the suffix array. It stores the lengths of the longest common + * prefixes (LCPs) between all pairs of consecutive suffixes in a sorted suffix array. + *

+ * https://en.wikipedia.org/wiki/LCP_array + *
+ * @author Jakub Szarawarski + * @author Justin Wetherell + */ +public class LCPArray { + + private static final char DEFAULT_END_SEQ_CHAR = '$'; + + private char END_SEQ_CHAR; + private SuffixArray suffixArrayBuilder; + private ArrayList LCP; + + public LCPArray(CharSequence sequence){ + this(sequence, DEFAULT_END_SEQ_CHAR); + } + + public LCPArray(CharSequence sequence, char endChar) { + END_SEQ_CHAR = endChar; + suffixArrayBuilder = new SuffixArray(sequence, END_SEQ_CHAR); + } + + public ArrayList getLCPArray() { + if (LCP == null) + LCPAlgorithm(); + return LCP; + } + + private void LCPAlgorithm() { + final ArrayList LCPR = getLCPR(); + getLCPfromLCPR(LCPR); + } + + private ArrayList getLCPR() { + final ArrayList KMRArray = suffixArrayBuilder.getKMRarray(); + final ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); + final String string = suffixArrayBuilder.getString(); + final int length = KMRArray.size(); + final ArrayList LCPR = new ArrayList(); // helper array, LCP[i] = LCPR[suffixArray[i]] + + int startingValue = 0; + for (int i=0; i 0 ? LCPRValue-1 : 0; + } + } + + return LCPR; + } + + private void getLCPfromLCPR(ArrayList LCPR) { + final ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); + final int length = suffixArray.size(); + + LCP = new ArrayList(); + LCP.add(null); //no value for LCP[0] + for (int i=1; i + * https://en.wikipedia.org/wiki/Suffix_array + *

+ * NOTE: This implementation returns starting indexes instead of full suffixes + *
+ * @author Jakub Szarawarski + * @author Justin Wetherell + */ +public class SuffixArray { + + private static final StringBuilder STRING_BUILDER = new StringBuilder(); + private static final char DEFAULT_END_SEQ_CHAR = '$'; + + private char END_SEQ_CHAR; + private String string; + private ArrayList suffixArray = null; + private ArrayList KMRarray = null; + + public SuffixArray(CharSequence sequence) { + this(sequence, DEFAULT_END_SEQ_CHAR); + } + + public SuffixArray(CharSequence sequence, char endChar) { + END_SEQ_CHAR = endChar; + string = buildStringWithEndChar(sequence); + } + + public ArrayList getSuffixArray() { + if (suffixArray == null) + KMRalgorithm(); + return suffixArray; + } + + /** + * @return inverted suffix array + */ + public ArrayList getKMRarray() { + if (KMRarray == null) + KMRalgorithm(); + return KMRarray; + } + + public String getString(){ + return string; + } + + /** + * Creates suffix array using KMR algorithm with O(n log^2 n) complexity. + * + * For radius r: + * KMR[i] == k, + * when string[i..i+r-1] is kth r-letter substring of string sorted lexicographically + * KMR is counted for radius = 1,2,4,8 ... + * KMR for radius bigger than string length is the inverted suffix array + */ + private void KMRalgorithm() { + final int length = string.length(); + + ArrayList KMRinvertedList = new ArrayList(); + ArrayList KMR = getBasicKMR(length); + + int radius = 1; + while (radius < length) { + KMRinvertedList = getKMRinvertedList(KMR, radius, length); + KMR = getKMR(KMRinvertedList, length); + radius *= 2; + } + + KMRarray = new ArrayList(KMR.subList(0, length)); + suffixArray = new ArrayList(); + for (KMRsWithIndex kmr : KMRinvertedList) + suffixArray.add(kmr.index); + } + + /** + * Creates KMR array for new radius from nearly inverted array. + * Elements from inverted array need to be grouped by substring tey represent. + * + * @param KMRinvertedList indexes are nearly inverted KMR array + * @param length string length + * @return KMR array for new radius + */ + private ArrayList getKMR(ArrayList KMRinvertedList, int length) { + final ArrayList KMR = new ArrayList(length*2); + for (int i=0; i<2*length; i++) + KMR.add(-1); + + int counter = 0; + for (int i=0; i0 && substringsAreEqual(KMRinvertedList, i)) + counter++; + KMR.set(KMRinvertedList.get(i).index, counter); + } + + return KMR; + } + + private boolean substringsAreEqual(ArrayList KMRinvertedList, int i) { + return (KMRinvertedList.get(i-1).beginKMR.equals(KMRinvertedList.get(i).beginKMR) == false) || + (KMRinvertedList.get(i-1).endKMR.equals(KMRinvertedList.get(i).endKMR) == false); + } + + /** + * helper method to create KMR array for radius = radius from KMR array for radius = radius/2 + * + * @param KMR KMR array for radius = radius/2 + * @param radius new radius + * @param length string length + * @return list of KMRsWithIndex which indexes are nearly inverted KMR array + */ + private ArrayList getKMRinvertedList(ArrayList KMR, int radius, int length) { + final ArrayList KMRinvertedList = new ArrayList(); + for (int i=0; i() { + @Override + public int compare(KMRsWithIndex A, KMRsWithIndex B) { + if (A.beginKMR.equals(B.beginKMR) == false) + return A.beginKMR.compareTo(B.beginKMR); + if (A.endKMR.equals(B.endKMR) == false) + return A.endKMR.compareTo(B.endKMR); + return A.index.compareTo(B.index); + } + } + ); + return KMRinvertedList; + } + + /** + * KMR array for radius=1, instead of initial natural numbers ascii codes are used + * + * @param length length of string + * @return pseudo KMR array for radius=1 + */ + private ArrayList getBasicKMR(int length) { + final ArrayList result = new ArrayList(length*2); + final char[] characters = string.toCharArray(); + for (int i=0; i + * https://en.wikipedia.org/wiki/Matching_(graph_theory) + *
+ * @author Jakub Szarawarski + */ +public class TurboMatching { + + /** + * Computes maximum matching, using turbomatching algorithm based on augmenting paths with O(EV) complexity. + * + * @param graph bipartite graph + * @param parameter of graph on which network is based + * @return a MatchingResult class instance containg a map of mates for each paired vertex and number of pairs + */ + public static > MatchingResult getMaximumMatching(Graph graph){ + Map, Graph.Vertex> mate = new HashMap, Graph.Vertex>(); + + while(pathset(graph, mate)); + + return new MatchingResult(mate); + } + + /** + * Searches for an augmenting path for each unmatched vertex. + * + * @param graph bipartite graph + * @param mate map containing a mate for each matched vertex + * @return information if any augmenting path was found + */ + private static > boolean pathset(Graph graph, Map, Graph.Vertex> mate){ + Set> visited = new HashSet>(); + + boolean result = false; + + for(Graph.Vertex vertex : graph.getVertices()){ + if (mate.containsKey(vertex) == false){ + if (path(graph, mate, visited, vertex)) + result = true; + } + } + + return result; + } + + /** + * Searches for an augmenting path for a vertex. + * Refreshes mates map appropriately. + * + * @param graph bipartite graph + * @param mate map containing a mate for each matched vertex + * @param visited set containing vertices visited in current pathset + * @param vertex regarded vertex + * @param parameter of graph on which network is based + * @return information if an augmenting path was found + */ + private static > boolean path(Graph graph, Map, Graph.Vertex> mate, Set> visited, Graph.Vertex vertex){ + if (visited.contains(vertex)) return false; + visited.add(vertex); + + for(Graph.Edge edge : vertex.getEdges()){ + + Graph.Vertex neighbour = edge.getFromVertex().equals(vertex) ? edge.getToVertex() : edge.getFromVertex(); + + if (mate.containsKey(neighbour) == false || path(graph, mate, visited, mate.get(neighbour))){ + mate.put(vertex, neighbour); + mate.put(neighbour, vertex); + return true; + } + } + + return false; + } + + + public static class MatchingResult>{ + private final Map, Graph.Vertex> mate; + private final int size; + + private MatchingResult(Map, Graph.Vertex> mate){ + this.mate = mate; + this.size = mate.size()/2; + } + + /** + * @return the number of edges in independent edge set + */ + public int getSize(){ + return this.size; + } + + /** + * @return a symetric map that contains a mate for each matched vertex + */ + public Map, Graph.Vertex> getMate(){ + return this.mate; + } + } +} diff --git a/test/com/jwetherell/algorithms/data_structures/test/LCPArrayTest.java b/test/com/jwetherell/algorithms/data_structures/test/LCPArrayTest.java new file mode 100644 index 00000000..3bfbe3fa --- /dev/null +++ b/test/com/jwetherell/algorithms/data_structures/test/LCPArrayTest.java @@ -0,0 +1,52 @@ +package com.jwetherell.algorithms.data_structures.test; + +import com.jwetherell.algorithms.data_structures.LCPArray; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; + +import static org.junit.Assert.*; + +public class LCPArrayTest { + + @Test + public void smallTest(){ + String string = "asdasdd"; + LCPArray LCPArrayBuilder = new LCPArray(string); + ArrayList LCPArray = LCPArrayBuilder.getLCPArray(); + ArrayList result = new ArrayList(); + + result.addAll(Arrays.asList(null, 0, 3, 0, 1, 1, 0, 2)); + + assertEquals(LCPArray, result); + } + + @Test + public void longTest(){ + + String string = "aasfaasdsadasdfasdasdasdasfdasfassdfas"; + LCPArray LCPArrayBuilder = new LCPArray(string); + ArrayList LCPArray = LCPArrayBuilder.getLCPArray(); + ArrayList result = new ArrayList(); + + result.addAll(Arrays.asList(null, 0, 3, 1, 1, 2, 8, 5, 3, 3, 2, 4, 3, 2, 0, + 6, 4, 3, 4, 1, 4, 1, 0, 2, 3, 3, 1, 0, 1, 1, 7, 4, 2, 5, 2, 1, 3, 2, 1)); + + assertEquals(LCPArray, result); + } + + @Test + public void singleLetterTest(){ + + String string = "aaaaaaaaaaaa"; + LCPArray LCPArrayBuilder = new LCPArray(string); + ArrayList LCPArray = LCPArrayBuilder.getLCPArray(); + ArrayList result = new ArrayList(); + + result.addAll(Arrays.asList(null , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); + + assertEquals(LCPArray, result); + } + +} \ No newline at end of file diff --git a/test/com/jwetherell/algorithms/data_structures/test/SuffixArrayTest.java b/test/com/jwetherell/algorithms/data_structures/test/SuffixArrayTest.java new file mode 100644 index 00000000..18002fbc --- /dev/null +++ b/test/com/jwetherell/algorithms/data_structures/test/SuffixArrayTest.java @@ -0,0 +1,44 @@ +package com.jwetherell.algorithms.data_structures.test; + +import com.jwetherell.algorithms.data_structures.SuffixArray; +import com.jwetherell.algorithms.data_structures.SuffixTree; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Set; + +import static org.junit.Assert.*; + +public class SuffixArrayTest { + + @Test + public void testSuffixArray(){ + String string = "aasfaasdsadasdfasdasdasdasfdasfassdfas"; + + SuffixArray suffixArrayBuilder = new SuffixArray(string); + SuffixTree suffixTree = new SuffixTree(string); + + Set suffixSet = suffixTree.getSuffixes(); + ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); + + int i=0; + for(String suffix : suffixSet){ + String substring = string.substring(suffixArray.get(i++)); + assertTrue(suffix.equals(substring)); + } + } + + @Test + public void testKMRarray(){ + String string = "aasfaasdsadasdfasdasdasdasfdasfassdfas"; + + SuffixArray suffixArrayBuilder = new SuffixArray(string); + ArrayList suffixArray = suffixArrayBuilder.getSuffixArray(); + ArrayList KMRarray = suffixArrayBuilder.getKMRarray(); + + int length = string.length(); + for(int i=0; i v_a1 = new Graph.Vertex(1); + private final Graph.Vertex v_a2 = new Graph.Vertex(2); + private final Graph.Vertex v_a3 = new Graph.Vertex(3); + private final Graph.Vertex v_b1 = new Graph.Vertex(4); + private final Graph.Vertex v_b2 = new Graph.Vertex(5); + private final Graph.Vertex v_b3 = new Graph.Vertex(6); + + private List> vertices = new ArrayList>(); + + { + vertices.add(v_a1); + vertices.add(v_a2); + vertices.add(v_a3); + vertices.add(v_b1); + vertices.add(v_b2); + vertices.add(v_b3); + } + @Test + public void testFullBipartiteGraph(){ + List> edges = new ArrayList>(); + { + edges.add(new Graph.Edge(1, v_a1, v_b1)); + edges.add(new Graph.Edge(1, v_a1, v_b2)); + edges.add(new Graph.Edge(1, v_a1, v_b3)); + edges.add(new Graph.Edge(1, v_a2, v_b1)); + edges.add(new Graph.Edge(1, v_a2, v_b2)); + edges.add(new Graph.Edge(1, v_a2, v_b3)); + edges.add(new Graph.Edge(1, v_a3, v_b1)); + edges.add(new Graph.Edge(1, v_a3, v_b2)); + edges.add(new Graph.Edge(1, v_a3, v_b3)); + } + + final Graph graph = new Graph(vertices, edges); + + TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); + assertTrue(matchingResult.getSize() == 3); + for(Graph.Vertex vertex : vertices){ + assertTrue(matchingResult.getMate().get(matchingResult.getMate().get(vertex)).equals(vertex)); + } + } + + @Test + public void testSingleEdgeForVertex(){ + List> edges = new ArrayList>(); + { + edges.add(new Graph.Edge(1, v_a1, v_b1)); + edges.add(new Graph.Edge(1, v_a2, v_b2)); + edges.add(new Graph.Edge(1, v_a3, v_b3)); + } + + final Graph graph = new Graph(vertices, edges); + + TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); + + assertTrue(matchingResult.getSize() == 3); + assertTrue(matchingResult.getMate().get(v_a1).equals(v_b1)); + assertTrue(matchingResult.getMate().get(v_a2).equals(v_b2)); + assertTrue(matchingResult.getMate().get(v_a3).equals(v_b3)); + assertTrue(matchingResult.getMate().get(v_b1).equals(v_a1)); + assertTrue(matchingResult.getMate().get(v_b2).equals(v_a2)); + assertTrue(matchingResult.getMate().get(v_b3).equals(v_a3)); + } + + @Test + public void testEmptyGraph(){ + List> edges = new ArrayList>(); + { + } + + final Graph graph = new Graph(vertices, edges); + + TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); + + assertTrue(matchingResult.getSize() == 0); + assertTrue(matchingResult.getMate().isEmpty()); + } + + @Test + public void testTwoMatched(){ + List> edges = new ArrayList>(); + { + edges.add(new Graph.Edge(1, v_a1, v_b1)); + edges.add(new Graph.Edge(1, v_a1, v_b3)); + edges.add(new Graph.Edge(1, v_a2, v_b2)); + edges.add(new Graph.Edge(1, v_a3, v_b2)); + } + + final Graph graph = new Graph(vertices, edges); + TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); + + assertTrue(matchingResult.getSize() == 2); + assertTrue(matchingResult.getMate().containsKey(v_a1)); + assertTrue(matchingResult.getMate().containsKey(v_b2)); + assertTrue(matchingResult.getMate().containsValue(v_a1)); + assertTrue(matchingResult.getMate().containsValue(v_b2)); + } + + @Test + public void testOneMatched(){ + List> edges = new ArrayList>(); + { + edges.add(new Graph.Edge(1, v_a1, v_b1)); + edges.add(new Graph.Edge(1, v_a1, v_b2)); + edges.add(new Graph.Edge(1, v_a1, v_b3)); + } + + final Graph graph = new Graph(vertices, edges); + TurboMatching.MatchingResult matchingResult = TurboMatching.getMaximumMatching(graph); + + assertTrue(matchingResult.getSize() == 1); + assertTrue(matchingResult.getMate().containsKey(v_a1)); + assertTrue(matchingResult.getMate().containsValue(v_a1)); + assertFalse(matchingResult.getMate().containsKey(v_a2)); + assertFalse(matchingResult.getMate().containsValue(v_a2)); + assertFalse(matchingResult.getMate().containsKey(v_a3)); + assertFalse(matchingResult.getMate().containsValue(v_a3)); + } + + +} \ No newline at end of file