forked from phishman3579/java-algorithms-implementation
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
601 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,7 @@ | |
* useful data structure for several applications, such as searches involving a | ||
* multidimensional search key (e.g. range searches and nearest neighbor | ||
* searches). k-d trees are a special case of binary space partitioning trees. | ||
* | ||
* <br> | ||
* @author Justin Wetherell <[email protected]> | ||
* @see <a href="http://en.wikipedia.org/wiki/K-d_tree">K-d_tree (Wikipedia)</a> | ||
*/ | ||
|
77 changes: 77 additions & 0 deletions
77
src/com/jwetherell/algorithms/data_structures/LCPArray.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
package com.jwetherell.algorithms.data_structures; | ||
|
||
import java.util.ArrayList; | ||
|
||
/** | ||
* In computer science, the longest common prefix array (LCP array) is an auxiliary | ||
* data structure to the suffix array. It stores the lengths of the longest common | ||
* prefixes (LCPs) between all pairs of consecutive suffixes in a sorted suffix array. | ||
* <p> | ||
* https://en.wikipedia.org/wiki/LCP_array | ||
* <br> | ||
* @author Jakub Szarawarski <[email protected]> | ||
* @author Justin Wetherell <[email protected]> | ||
*/ | ||
public class LCPArray { | ||
|
||
private static final char DEFAULT_END_SEQ_CHAR = '$'; | ||
|
||
private char END_SEQ_CHAR; | ||
private SuffixArray suffixArrayBuilder; | ||
private ArrayList<Integer> LCP; | ||
|
||
public LCPArray(CharSequence sequence){ | ||
this(sequence, DEFAULT_END_SEQ_CHAR); | ||
} | ||
|
||
public LCPArray(CharSequence sequence, char endChar) { | ||
END_SEQ_CHAR = endChar; | ||
suffixArrayBuilder = new SuffixArray(sequence, END_SEQ_CHAR); | ||
} | ||
|
||
public ArrayList<Integer> getLCPArray() { | ||
if (LCP == null) | ||
LCPAlgorithm(); | ||
return LCP; | ||
} | ||
|
||
private void LCPAlgorithm() { | ||
final ArrayList<Integer> LCPR = getLCPR(); | ||
getLCPfromLCPR(LCPR); | ||
} | ||
|
||
private ArrayList<Integer> getLCPR() { | ||
final ArrayList<Integer> KMRArray = suffixArrayBuilder.getKMRarray(); | ||
final ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray(); | ||
final String string = suffixArrayBuilder.getString(); | ||
final int length = KMRArray.size(); | ||
final ArrayList<Integer> LCPR = new ArrayList<Integer>(); // helper array, LCP[i] = LCPR[suffixArray[i]] | ||
|
||
int startingValue = 0; | ||
for (int i=0; i<length; i++) { | ||
if(KMRArray.get(i).equals(0)) { | ||
LCPR.add(0); | ||
startingValue = 0; | ||
} else { | ||
int LCPRValue = startingValue; | ||
final int predecessor = suffixArray.get(KMRArray.get(i)-1); | ||
while (string.charAt(i+LCPRValue) == string.charAt(predecessor+LCPRValue)) | ||
LCPRValue++; | ||
LCPR.add(LCPRValue); | ||
startingValue = LCPRValue-1 > 0 ? LCPRValue-1 : 0; | ||
} | ||
} | ||
|
||
return LCPR; | ||
} | ||
|
||
private void getLCPfromLCPR(ArrayList<Integer> LCPR) { | ||
final ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray(); | ||
final int length = suffixArray.size(); | ||
|
||
LCP = new ArrayList<Integer>(); | ||
LCP.add(null); //no value for LCP[0] | ||
for (int i=1; i<length; i++) | ||
LCP.add(LCPR.get(suffixArray.get(i))); | ||
} | ||
} |
176 changes: 176 additions & 0 deletions
176
src/com/jwetherell/algorithms/data_structures/SuffixArray.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
package com.jwetherell.algorithms.data_structures; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.Comparator; | ||
|
||
/** | ||
* In computer science, a suffix array is a sorted array of all suffixes of a string. | ||
* It is a data structure used, among others, in full text indices, data compression | ||
* algorithms and within the field of bibliometrics. | ||
* <p> | ||
* https://en.wikipedia.org/wiki/Suffix_array | ||
* <p> | ||
* NOTE: This implementation returns starting indexes instead of full suffixes | ||
* <br> | ||
* @author Jakub Szarawarski <[email protected]> | ||
* @author Justin Wetherell <[email protected]> | ||
*/ | ||
public class SuffixArray { | ||
|
||
private static final StringBuilder STRING_BUILDER = new StringBuilder(); | ||
private static final char DEFAULT_END_SEQ_CHAR = '$'; | ||
|
||
private char END_SEQ_CHAR; | ||
private String string; | ||
private ArrayList<Integer> suffixArray = null; | ||
private ArrayList<Integer> KMRarray = null; | ||
|
||
public SuffixArray(CharSequence sequence) { | ||
this(sequence, DEFAULT_END_SEQ_CHAR); | ||
} | ||
|
||
public SuffixArray(CharSequence sequence, char endChar) { | ||
END_SEQ_CHAR = endChar; | ||
string = buildStringWithEndChar(sequence); | ||
} | ||
|
||
public ArrayList<Integer> getSuffixArray() { | ||
if (suffixArray == null) | ||
KMRalgorithm(); | ||
return suffixArray; | ||
} | ||
|
||
/** | ||
* @return inverted suffix array | ||
*/ | ||
public ArrayList<Integer> getKMRarray() { | ||
if (KMRarray == null) | ||
KMRalgorithm(); | ||
return KMRarray; | ||
} | ||
|
||
public String getString(){ | ||
return string; | ||
} | ||
|
||
/** | ||
* Creates suffix array using KMR algorithm with O(n log^2 n) complexity. | ||
* | ||
* For radius r: | ||
* KMR[i] == k, | ||
* when string[i..i+r-1] is kth r-letter substring of string sorted lexicographically | ||
* KMR is counted for radius = 1,2,4,8 ... | ||
* KMR for radius bigger than string length is the inverted suffix array | ||
*/ | ||
private void KMRalgorithm() { | ||
final int length = string.length(); | ||
|
||
ArrayList<KMRsWithIndex> KMRinvertedList = new ArrayList<KMRsWithIndex>(); | ||
ArrayList<Integer> KMR = getBasicKMR(length); | ||
|
||
int radius = 1; | ||
while (radius < length) { | ||
KMRinvertedList = getKMRinvertedList(KMR, radius, length); | ||
KMR = getKMR(KMRinvertedList, length); | ||
radius *= 2; | ||
} | ||
|
||
KMRarray = new ArrayList<Integer>(KMR.subList(0, length)); | ||
suffixArray = new ArrayList<Integer>(); | ||
for (KMRsWithIndex kmr : KMRinvertedList) | ||
suffixArray.add(kmr.index); | ||
} | ||
|
||
/** | ||
* Creates KMR array for new radius from nearly inverted array. | ||
* Elements from inverted array need to be grouped by substring tey represent. | ||
* | ||
* @param KMRinvertedList indexes are nearly inverted KMR array | ||
* @param length string length | ||
* @return KMR array for new radius | ||
*/ | ||
private ArrayList<Integer> getKMR(ArrayList<KMRsWithIndex> KMRinvertedList, int length) { | ||
final ArrayList<Integer> KMR = new ArrayList<Integer>(length*2); | ||
for (int i=0; i<2*length; i++) | ||
KMR.add(-1); | ||
|
||
int counter = 0; | ||
for (int i=0; i<length; i++){ | ||
if(i>0 && substringsAreEqual(KMRinvertedList, i)) | ||
counter++; | ||
KMR.set(KMRinvertedList.get(i).index, counter); | ||
} | ||
|
||
return KMR; | ||
} | ||
|
||
private boolean substringsAreEqual(ArrayList<KMRsWithIndex> KMRinvertedList, int i) { | ||
return (KMRinvertedList.get(i-1).beginKMR.equals(KMRinvertedList.get(i).beginKMR) == false) || | ||
(KMRinvertedList.get(i-1).endKMR.equals(KMRinvertedList.get(i).endKMR) == false); | ||
} | ||
|
||
/** | ||
* helper method to create KMR array for radius = radius from KMR array for radius = radius/2 | ||
* | ||
* @param KMR KMR array for radius = radius/2 | ||
* @param radius new radius | ||
* @param length string length | ||
* @return list of KMRsWithIndex which indexes are nearly inverted KMR array | ||
*/ | ||
private ArrayList<KMRsWithIndex> getKMRinvertedList(ArrayList<Integer> KMR, int radius, int length) { | ||
final ArrayList<KMRsWithIndex> KMRinvertedList = new ArrayList<KMRsWithIndex>(); | ||
for (int i=0; i<length; i++) | ||
KMRinvertedList.add(new KMRsWithIndex(KMR.get(i), KMR.get(i+radius), i)); | ||
|
||
Collections.sort(KMRinvertedList, | ||
new Comparator<KMRsWithIndex>() { | ||
@Override | ||
public int compare(KMRsWithIndex A, KMRsWithIndex B) { | ||
if (A.beginKMR.equals(B.beginKMR) == false) | ||
return A.beginKMR.compareTo(B.beginKMR); | ||
if (A.endKMR.equals(B.endKMR) == false) | ||
return A.endKMR.compareTo(B.endKMR); | ||
return A.index.compareTo(B.index); | ||
} | ||
} | ||
); | ||
return KMRinvertedList; | ||
} | ||
|
||
/** | ||
* KMR array for radius=1, instead of initial natural numbers ascii codes are used | ||
* | ||
* @param length length of string | ||
* @return pseudo KMR array for radius=1 | ||
*/ | ||
private ArrayList<Integer> getBasicKMR(int length) { | ||
final ArrayList<Integer> result = new ArrayList<Integer>(length*2); | ||
final char[] characters = string.toCharArray(); | ||
for (int i=0; i<length; i++) | ||
result.add(new Integer(characters[i])); | ||
for (int i=0; i<length; i++) | ||
result.add(-1); | ||
return result; | ||
} | ||
|
||
private String buildStringWithEndChar(CharSequence sequence) { | ||
STRING_BUILDER.setLength(0); | ||
STRING_BUILDER.append(sequence); | ||
if (STRING_BUILDER.indexOf(String.valueOf(END_SEQ_CHAR)) < 0) | ||
STRING_BUILDER.append(END_SEQ_CHAR); | ||
return STRING_BUILDER.toString(); | ||
} | ||
|
||
private class KMRsWithIndex{ | ||
Integer beginKMR; | ||
Integer endKMR; | ||
Integer index; | ||
|
||
KMRsWithIndex(Integer begin, Integer end, Integer index){ | ||
this.beginKMR = begin; | ||
this.endKMR = end; | ||
this.index = index; | ||
} | ||
} | ||
} |
Oops, something went wrong.