forked from phishman3579/java-algorithms-implementation
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9ccb6d4
commit 2f826ce
Showing
4 changed files
with
66 additions
and
78 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,7 @@ | |
* useful data structure for several applications, such as searches involving a | ||
* multidimensional search key (e.g. range searches and nearest neighbor | ||
* searches). k-d trees are a special case of binary space partitioning trees. | ||
* | ||
* <br> | ||
* @author Justin Wetherell <[email protected]> | ||
* @see <a href="http://en.wikipedia.org/wiki/K-d_tree">K-d_tree (Wikipedia)</a> | ||
*/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,17 +6,18 @@ | |
* In computer science, the longest common prefix array (LCP array) is an auxiliary | ||
* data structure to the suffix array. It stores the lengths of the longest common | ||
* prefixes (LCPs) between all pairs of consecutive suffixes in a sorted suffix array. | ||
* | ||
* <p> | ||
* https://en.wikipedia.org/wiki/LCP_array | ||
* | ||
* <br> | ||
* @author Jakub Szarawarski <[email protected]> | ||
* @author Justin Wetherell <[email protected]> | ||
*/ | ||
public class LCPArray { | ||
|
||
private static final char DEFAULT_END_SEQ_CHAR = '$'; | ||
private char END_SEQ_CHAR; | ||
|
||
private char END_SEQ_CHAR; | ||
private SuffixArray suffixArrayBuilder; | ||
|
||
private ArrayList<Integer> LCP; | ||
|
||
public LCPArray(CharSequence sequence){ | ||
|
@@ -25,43 +26,37 @@ public LCPArray(CharSequence sequence){ | |
|
||
public LCPArray(CharSequence sequence, char endChar) { | ||
END_SEQ_CHAR = endChar; | ||
suffixArrayBuilder = new SuffixArray(sequence, endChar); | ||
suffixArrayBuilder = new SuffixArray(sequence, END_SEQ_CHAR); | ||
} | ||
|
||
public ArrayList<Integer> getLCPArray() { | ||
if(LCP == null){ | ||
if (LCP == null) | ||
LCPAlgorithm(); | ||
} | ||
return LCP; | ||
} | ||
|
||
private void LCPAlgorithm() { | ||
ArrayList<Integer> LCPR = getLCPR(); | ||
final ArrayList<Integer> LCPR = getLCPR(); | ||
getLCPfromLCPR(LCPR); | ||
} | ||
|
||
private ArrayList<Integer> getLCPR() { | ||
ArrayList<Integer> KMRArray = suffixArrayBuilder.getKMRarray(); | ||
ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray(); | ||
String string = suffixArrayBuilder.getString(); | ||
int length = KMRArray.size(); | ||
|
||
ArrayList<Integer> LCPR = new ArrayList<Integer>(); // helper array, LCP[i] = LCPR[suffixArray[i]] | ||
final ArrayList<Integer> KMRArray = suffixArrayBuilder.getKMRarray(); | ||
final ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray(); | ||
final String string = suffixArrayBuilder.getString(); | ||
final int length = KMRArray.size(); | ||
final ArrayList<Integer> LCPR = new ArrayList<Integer>(); // helper array, LCP[i] = LCPR[suffixArray[i]] | ||
|
||
int startingValue = 0; | ||
|
||
for(int i=0; i<length; i++){ | ||
if(KMRArray.get(i).equals(0)){ | ||
for (int i=0; i<length; i++) { | ||
if(KMRArray.get(i).equals(0)) { | ||
LCPR.add(0); | ||
startingValue = 0; | ||
} | ||
else{ | ||
} else { | ||
int LCPRValue = startingValue; | ||
int predecessor = suffixArray.get(KMRArray.get(i)-1); | ||
|
||
while(string.charAt(i+LCPRValue) == string.charAt(predecessor+LCPRValue)) | ||
final int predecessor = suffixArray.get(KMRArray.get(i)-1); | ||
while (string.charAt(i+LCPRValue) == string.charAt(predecessor+LCPRValue)) | ||
LCPRValue++; | ||
|
||
LCPR.add(LCPRValue); | ||
startingValue = LCPRValue-1 > 0 ? LCPRValue-1 : 0; | ||
} | ||
|
@@ -71,14 +66,12 @@ private ArrayList<Integer> getLCPR() { | |
} | ||
|
||
private void getLCPfromLCPR(ArrayList<Integer> LCPR) { | ||
ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray(); | ||
int length = suffixArray.size(); | ||
final ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray(); | ||
final int length = suffixArray.size(); | ||
|
||
LCP = new ArrayList<Integer>(); | ||
LCP.add(null); //no value for LCP[0] | ||
|
||
for(int i=1; i<length; i++){ | ||
for (int i=1; i<length; i++) | ||
LCP.add(LCPR.get(suffixArray.get(i))); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,15 +8,19 @@ | |
* In computer science, a suffix array is a sorted array of all suffixes of a string. | ||
* It is a data structure used, among others, in full text indices, data compression | ||
* algorithms and within the field of bibliometrics. | ||
* | ||
* <p> | ||
* https://en.wikipedia.org/wiki/Suffix_array | ||
* | ||
* This implementation returns starting indexes instead of full suffixes | ||
* | ||
* <p> | ||
* NOTE: This implementation returns starting indexes instead of full suffixes | ||
* <br> | ||
* @author Jakub Szarawarski <[email protected]> | ||
* @author Justin Wetherell <[email protected]> | ||
*/ | ||
public class SuffixArray { | ||
|
||
private static final StringBuilder STRING_BUILDER = new StringBuilder(); | ||
private static final char DEFAULT_END_SEQ_CHAR = '$'; | ||
|
||
private char END_SEQ_CHAR; | ||
private String string; | ||
private ArrayList<Integer> suffixArray = null; | ||
|
@@ -32,19 +36,17 @@ public SuffixArray(CharSequence sequence, char endChar) { | |
} | ||
|
||
public ArrayList<Integer> getSuffixArray() { | ||
if(suffixArray == null){ | ||
if (suffixArray == null) | ||
KMRalgorithm(); | ||
} | ||
return suffixArray; | ||
} | ||
|
||
/** | ||
* @return inverted suffix array | ||
*/ | ||
public ArrayList<Integer> getKMRarray() { | ||
if (KMRarray == null) { | ||
if (KMRarray == null) | ||
KMRalgorithm(); | ||
} | ||
return KMRarray; | ||
} | ||
|
||
|
@@ -62,24 +64,22 @@ public String getString(){ | |
* KMR for radius bigger than string length is the inverted suffix array | ||
*/ | ||
private void KMRalgorithm() { | ||
int length = string.length(); | ||
final int length = string.length(); | ||
|
||
ArrayList<Integer> KMR = getBasicKMR(length); | ||
ArrayList<KMRsWithIndex> KMRinvertedList = new ArrayList<KMRsWithIndex>(); | ||
ArrayList<Integer> KMR = getBasicKMR(length); | ||
|
||
int radius = 1; | ||
|
||
while(radius < length){ | ||
while (radius < length) { | ||
KMRinvertedList = getKMRinvertedList(KMR, radius, length); | ||
KMR = getKMR(KMRinvertedList, radius, length); | ||
KMR = getKMR(KMRinvertedList, length); | ||
radius *= 2; | ||
} | ||
|
||
KMRarray = new ArrayList<Integer>(KMR.subList(0, length)); | ||
suffixArray = new ArrayList<Integer>(); | ||
for(KMRsWithIndex kmr : KMRinvertedList){ | ||
for (KMRsWithIndex kmr : KMRinvertedList) | ||
suffixArray.add(kmr.index); | ||
} | ||
} | ||
|
||
/** | ||
|
@@ -90,12 +90,13 @@ private void KMRalgorithm() { | |
* @param length string length | ||
* @return KMR array for new radius | ||
*/ | ||
private ArrayList<Integer> getKMR(ArrayList<KMRsWithIndex> KMRinvertedList, int radius, int length) { | ||
ArrayList<Integer> KMR = new ArrayList<Integer>(length*2); | ||
for(int i=0; i<2*length; i++) KMR.add(-1); | ||
private ArrayList<Integer> getKMR(ArrayList<KMRsWithIndex> KMRinvertedList, int length) { | ||
final ArrayList<Integer> KMR = new ArrayList<Integer>(length*2); | ||
for (int i=0; i<2*length; i++) | ||
KMR.add(-1); | ||
|
||
int counter = 0; | ||
for(int i=0; i<length; i++){ | ||
for (int i=0; i<length; i++){ | ||
if(i>0 && substringsAreEqual(KMRinvertedList, i)) | ||
counter++; | ||
KMR.set(KMRinvertedList.get(i).index, counter); | ||
|
@@ -105,8 +106,8 @@ private ArrayList<Integer> getKMR(ArrayList<KMRsWithIndex> KMRinvertedList, int | |
} | ||
|
||
private boolean substringsAreEqual(ArrayList<KMRsWithIndex> KMRinvertedList, int i) { | ||
return KMRinvertedList.get(i-1).beginKMR.equals(KMRinvertedList.get(i).beginKMR) == false || | ||
KMRinvertedList.get(i-1).endKMR.equals(KMRinvertedList.get(i).endKMR) == false; | ||
return (KMRinvertedList.get(i-1).beginKMR.equals(KMRinvertedList.get(i).beginKMR) == false) || | ||
(KMRinvertedList.get(i-1).endKMR.equals(KMRinvertedList.get(i).endKMR) == false); | ||
} | ||
|
||
/** | ||
|
@@ -118,25 +119,22 @@ private boolean substringsAreEqual(ArrayList<KMRsWithIndex> KMRinvertedList, int | |
* @return list of KMRsWithIndex which indexes are nearly inverted KMR array | ||
*/ | ||
private ArrayList<KMRsWithIndex> getKMRinvertedList(ArrayList<Integer> KMR, int radius, int length) { | ||
ArrayList<KMRsWithIndex> KMRinvertedList = new ArrayList<KMRsWithIndex>(); | ||
|
||
for(int i=0; i<length; i++){ | ||
final ArrayList<KMRsWithIndex> KMRinvertedList = new ArrayList<KMRsWithIndex>(); | ||
for (int i=0; i<length; i++) | ||
KMRinvertedList.add(new KMRsWithIndex(KMR.get(i), KMR.get(i+radius), i)); | ||
} | ||
|
||
Collections.sort(KMRinvertedList, new Comparator<KMRsWithIndex>() { | ||
@Override | ||
public int compare(KMRsWithIndex A, KMRsWithIndex B) { | ||
if (A.beginKMR.equals(B.beginKMR) == false) { | ||
return A.beginKMR.compareTo(B.beginKMR); | ||
} | ||
if (A.endKMR.equals(B.endKMR) == false) { | ||
return A.endKMR.compareTo(B.endKMR); | ||
Collections.sort(KMRinvertedList, | ||
new Comparator<KMRsWithIndex>() { | ||
@Override | ||
public int compare(KMRsWithIndex A, KMRsWithIndex B) { | ||
if (A.beginKMR.equals(B.beginKMR) == false) | ||
return A.beginKMR.compareTo(B.beginKMR); | ||
if (A.endKMR.equals(B.endKMR) == false) | ||
return A.endKMR.compareTo(B.endKMR); | ||
return A.index.compareTo(B.index); | ||
} | ||
return A.index.compareTo(B.index); | ||
} | ||
}); | ||
|
||
); | ||
return KMRinvertedList; | ||
} | ||
|
||
|
@@ -147,23 +145,21 @@ public int compare(KMRsWithIndex A, KMRsWithIndex B) { | |
* @return pseudo KMR array for radius=1 | ||
*/ | ||
private ArrayList<Integer> getBasicKMR(int length) { | ||
ArrayList<Integer> result = new ArrayList<Integer>(length*2); | ||
char[] characters = string.toCharArray(); | ||
for(int i=0; i<length; i++){ | ||
final ArrayList<Integer> result = new ArrayList<Integer>(length*2); | ||
final char[] characters = string.toCharArray(); | ||
for (int i=0; i<length; i++) | ||
result.add(new Integer(characters[i])); | ||
} | ||
for(int i=0; i<length; i++){ | ||
for (int i=0; i<length; i++) | ||
result.add(-1); | ||
} | ||
|
||
return result; | ||
} | ||
|
||
private String buildStringWithEndChar(CharSequence sequence) { | ||
StringBuilder builder = new StringBuilder(sequence); | ||
if (builder.indexOf(String.valueOf(END_SEQ_CHAR)) < 0) | ||
builder.append(END_SEQ_CHAR); | ||
return builder.toString(); | ||
STRING_BUILDER.setLength(0); | ||
STRING_BUILDER.append(sequence); | ||
if (STRING_BUILDER.indexOf(String.valueOf(END_SEQ_CHAR)) < 0) | ||
STRING_BUILDER.append(END_SEQ_CHAR); | ||
return STRING_BUILDER.toString(); | ||
} | ||
|
||
private class KMRsWithIndex{ | ||
|
@@ -177,4 +173,4 @@ private class KMRsWithIndex{ | |
this.index = index; | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters