Skip to content

Commit

Permalink
Code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
phishman3579 committed Jul 3, 2017
1 parent 9ccb6d4 commit 2f826ce
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 78 deletions.
2 changes: 1 addition & 1 deletion src/com/jwetherell/algorithms/data_structures/KdTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
* useful data structure for several applications, such as searches involving a
* multidimensional search key (e.g. range searches and nearest neighbor
* searches). k-d trees are a special case of binary space partitioning trees.
*
* <br>
* @author Justin Wetherell <[email protected]>
* @see <a href="http://en.wikipedia.org/wiki/K-d_tree">K-d_tree (Wikipedia)</a>
*/
Expand Down
49 changes: 21 additions & 28 deletions src/com/jwetherell/algorithms/data_structures/LCPArray.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,18 @@
* In computer science, the longest common prefix array (LCP array) is an auxiliary
* data structure to the suffix array. It stores the lengths of the longest common
* prefixes (LCPs) between all pairs of consecutive suffixes in a sorted suffix array.
*
* <p>
* https://en.wikipedia.org/wiki/LCP_array
*
* <br>
* @author Jakub Szarawarski <[email protected]>
* @author Justin Wetherell <[email protected]>
*/
public class LCPArray {

private static final char DEFAULT_END_SEQ_CHAR = '$';
private char END_SEQ_CHAR;

private char END_SEQ_CHAR;
private SuffixArray suffixArrayBuilder;

private ArrayList<Integer> LCP;

public LCPArray(CharSequence sequence){
Expand All @@ -25,43 +26,37 @@ public LCPArray(CharSequence sequence){

public LCPArray(CharSequence sequence, char endChar) {
END_SEQ_CHAR = endChar;
suffixArrayBuilder = new SuffixArray(sequence, endChar);
suffixArrayBuilder = new SuffixArray(sequence, END_SEQ_CHAR);
}

public ArrayList<Integer> getLCPArray() {
if(LCP == null){
if (LCP == null)
LCPAlgorithm();
}
return LCP;
}

private void LCPAlgorithm() {
ArrayList<Integer> LCPR = getLCPR();
final ArrayList<Integer> LCPR = getLCPR();
getLCPfromLCPR(LCPR);
}

private ArrayList<Integer> getLCPR() {
ArrayList<Integer> KMRArray = suffixArrayBuilder.getKMRarray();
ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray();
String string = suffixArrayBuilder.getString();
int length = KMRArray.size();

ArrayList<Integer> LCPR = new ArrayList<Integer>(); // helper array, LCP[i] = LCPR[suffixArray[i]]
final ArrayList<Integer> KMRArray = suffixArrayBuilder.getKMRarray();
final ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray();
final String string = suffixArrayBuilder.getString();
final int length = KMRArray.size();
final ArrayList<Integer> LCPR = new ArrayList<Integer>(); // helper array, LCP[i] = LCPR[suffixArray[i]]

int startingValue = 0;

for(int i=0; i<length; i++){
if(KMRArray.get(i).equals(0)){
for (int i=0; i<length; i++) {
if(KMRArray.get(i).equals(0)) {
LCPR.add(0);
startingValue = 0;
}
else{
} else {
int LCPRValue = startingValue;
int predecessor = suffixArray.get(KMRArray.get(i)-1);

while(string.charAt(i+LCPRValue) == string.charAt(predecessor+LCPRValue))
final int predecessor = suffixArray.get(KMRArray.get(i)-1);
while (string.charAt(i+LCPRValue) == string.charAt(predecessor+LCPRValue))
LCPRValue++;

LCPR.add(LCPRValue);
startingValue = LCPRValue-1 > 0 ? LCPRValue-1 : 0;
}
Expand All @@ -71,14 +66,12 @@ private ArrayList<Integer> getLCPR() {
}

private void getLCPfromLCPR(ArrayList<Integer> LCPR) {
ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray();
int length = suffixArray.size();
final ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray();
final int length = suffixArray.size();

LCP = new ArrayList<Integer>();
LCP.add(null); //no value for LCP[0]

for(int i=1; i<length; i++){
for (int i=1; i<length; i++)
LCP.add(LCPR.get(suffixArray.get(i)));
}
}
}
92 changes: 44 additions & 48 deletions src/com/jwetherell/algorithms/data_structures/SuffixArray.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,19 @@
* In computer science, a suffix array is a sorted array of all suffixes of a string.
* It is a data structure used, among others, in full text indices, data compression
* algorithms and within the field of bibliometrics.
*
* <p>
* https://en.wikipedia.org/wiki/Suffix_array
*
* This implementation returns starting indexes instead of full suffixes
*
* <p>
* NOTE: This implementation returns starting indexes instead of full suffixes
* <br>
* @author Jakub Szarawarski <[email protected]>
* @author Justin Wetherell <[email protected]>
*/
public class SuffixArray {

private static final StringBuilder STRING_BUILDER = new StringBuilder();
private static final char DEFAULT_END_SEQ_CHAR = '$';

private char END_SEQ_CHAR;
private String string;
private ArrayList<Integer> suffixArray = null;
Expand All @@ -32,19 +36,17 @@ public SuffixArray(CharSequence sequence, char endChar) {
}

public ArrayList<Integer> getSuffixArray() {
if(suffixArray == null){
if (suffixArray == null)
KMRalgorithm();
}
return suffixArray;
}

/**
* @return inverted suffix array
*/
public ArrayList<Integer> getKMRarray() {
if (KMRarray == null) {
if (KMRarray == null)
KMRalgorithm();
}
return KMRarray;
}

Expand All @@ -62,24 +64,22 @@ public String getString(){
* KMR for radius bigger than string length is the inverted suffix array
*/
private void KMRalgorithm() {
int length = string.length();
final int length = string.length();

ArrayList<Integer> KMR = getBasicKMR(length);
ArrayList<KMRsWithIndex> KMRinvertedList = new ArrayList<KMRsWithIndex>();
ArrayList<Integer> KMR = getBasicKMR(length);

int radius = 1;

while(radius < length){
while (radius < length) {
KMRinvertedList = getKMRinvertedList(KMR, radius, length);
KMR = getKMR(KMRinvertedList, radius, length);
KMR = getKMR(KMRinvertedList, length);
radius *= 2;
}

KMRarray = new ArrayList<Integer>(KMR.subList(0, length));
suffixArray = new ArrayList<Integer>();
for(KMRsWithIndex kmr : KMRinvertedList){
for (KMRsWithIndex kmr : KMRinvertedList)
suffixArray.add(kmr.index);
}
}

/**
Expand All @@ -90,12 +90,13 @@ private void KMRalgorithm() {
* @param length string length
* @return KMR array for new radius
*/
private ArrayList<Integer> getKMR(ArrayList<KMRsWithIndex> KMRinvertedList, int radius, int length) {
ArrayList<Integer> KMR = new ArrayList<Integer>(length*2);
for(int i=0; i<2*length; i++) KMR.add(-1);
private ArrayList<Integer> getKMR(ArrayList<KMRsWithIndex> KMRinvertedList, int length) {
final ArrayList<Integer> KMR = new ArrayList<Integer>(length*2);
for (int i=0; i<2*length; i++)
KMR.add(-1);

int counter = 0;
for(int i=0; i<length; i++){
for (int i=0; i<length; i++){
if(i>0 && substringsAreEqual(KMRinvertedList, i))
counter++;
KMR.set(KMRinvertedList.get(i).index, counter);
Expand All @@ -105,8 +106,8 @@ private ArrayList<Integer> getKMR(ArrayList<KMRsWithIndex> KMRinvertedList, int
}

private boolean substringsAreEqual(ArrayList<KMRsWithIndex> KMRinvertedList, int i) {
return KMRinvertedList.get(i-1).beginKMR.equals(KMRinvertedList.get(i).beginKMR) == false ||
KMRinvertedList.get(i-1).endKMR.equals(KMRinvertedList.get(i).endKMR) == false;
return (KMRinvertedList.get(i-1).beginKMR.equals(KMRinvertedList.get(i).beginKMR) == false) ||
(KMRinvertedList.get(i-1).endKMR.equals(KMRinvertedList.get(i).endKMR) == false);
}

/**
Expand All @@ -118,25 +119,22 @@ private boolean substringsAreEqual(ArrayList<KMRsWithIndex> KMRinvertedList, int
* @return list of KMRsWithIndex which indexes are nearly inverted KMR array
*/
private ArrayList<KMRsWithIndex> getKMRinvertedList(ArrayList<Integer> KMR, int radius, int length) {
ArrayList<KMRsWithIndex> KMRinvertedList = new ArrayList<KMRsWithIndex>();

for(int i=0; i<length; i++){
final ArrayList<KMRsWithIndex> KMRinvertedList = new ArrayList<KMRsWithIndex>();
for (int i=0; i<length; i++)
KMRinvertedList.add(new KMRsWithIndex(KMR.get(i), KMR.get(i+radius), i));
}

Collections.sort(KMRinvertedList, new Comparator<KMRsWithIndex>() {
@Override
public int compare(KMRsWithIndex A, KMRsWithIndex B) {
if (A.beginKMR.equals(B.beginKMR) == false) {
return A.beginKMR.compareTo(B.beginKMR);
}
if (A.endKMR.equals(B.endKMR) == false) {
return A.endKMR.compareTo(B.endKMR);
Collections.sort(KMRinvertedList,
new Comparator<KMRsWithIndex>() {
@Override
public int compare(KMRsWithIndex A, KMRsWithIndex B) {
if (A.beginKMR.equals(B.beginKMR) == false)
return A.beginKMR.compareTo(B.beginKMR);
if (A.endKMR.equals(B.endKMR) == false)
return A.endKMR.compareTo(B.endKMR);
return A.index.compareTo(B.index);
}
return A.index.compareTo(B.index);
}
});

);
return KMRinvertedList;
}

Expand All @@ -147,23 +145,21 @@ public int compare(KMRsWithIndex A, KMRsWithIndex B) {
* @return pseudo KMR array for radius=1
*/
private ArrayList<Integer> getBasicKMR(int length) {
ArrayList<Integer> result = new ArrayList<Integer>(length*2);
char[] characters = string.toCharArray();
for(int i=0; i<length; i++){
final ArrayList<Integer> result = new ArrayList<Integer>(length*2);
final char[] characters = string.toCharArray();
for (int i=0; i<length; i++)
result.add(new Integer(characters[i]));
}
for(int i=0; i<length; i++){
for (int i=0; i<length; i++)
result.add(-1);
}

return result;
}

private String buildStringWithEndChar(CharSequence sequence) {
StringBuilder builder = new StringBuilder(sequence);
if (builder.indexOf(String.valueOf(END_SEQ_CHAR)) < 0)
builder.append(END_SEQ_CHAR);
return builder.toString();
STRING_BUILDER.setLength(0);
STRING_BUILDER.append(sequence);
if (STRING_BUILDER.indexOf(String.valueOf(END_SEQ_CHAR)) < 0)
STRING_BUILDER.append(END_SEQ_CHAR);
return STRING_BUILDER.toString();
}

private class KMRsWithIndex{
Expand All @@ -177,4 +173,4 @@ private class KMRsWithIndex{
this.index = index;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ public void testSuffixArray(){
Set<String> suffixSet = suffixTree.getSuffixes();
ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray();

int length = string.length();
int i=0;
for(String suffix : suffixSet){
String substring = string.substring(suffixArray.get(i++));
Expand Down

0 comments on commit 2f826ce

Please sign in to comment.