Skip to content

Commit

Permalink
Fixed #120: handle seqs of length 0.
Browse files Browse the repository at this point in the history
  • Loading branch information
Martinsos committed Nov 6, 2019
1 parent 34dea59 commit 4ecfd34
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 22 deletions.
2 changes: 1 addition & 1 deletion bindings/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
name = "edlib",
description = "Lightweight, super fast library for sequence alignment using edit (Levenshtein) distance.",
long_description = long_description,
version = "1.3.4",
version = "1.3.5",
url = "https://github.com/Martinsos/edlib",
author = "Martin Sosic",
author_email = "[email protected]",
Expand Down
36 changes: 24 additions & 12 deletions bindings/python/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,50 +11,62 @@
if not (result and result["editDistance"] == 3):
testFailed = True

# Additional equalities.
result = edlib.align("ACTG", "CACTRT", mode="HW", task="path", additionalEqualities=[("R", "A"), ("R", "G")])
if not (result and result["editDistance"] == 0):
testFailed = True


# Nice alignment.
resultNW = edlib.align(query="TAAGGATGGTCCCATTC", target="AAGGGGTCTCATATC", mode="NW", task="path")
test_getNiceNW = edlib.getNiceAlignment(resultNW, query="TAAGGATGGTCCCATTC", target="AAGGGGTCTCATATC")
if not (len(test_getNiceNW['query_aligned']) == 18 and len(test_getNiceNW['target_aligned'])==18):
testFailed = True
if not (test_getNiceNW['query_aligned'] == 'TAAGGATGGTCCCAT-TC'):
testFailed = True
testFailed = True
if not (test_getNiceNW['matched_aligned'] == '-||||--||||.|||-||'):
testFailed = True
testFailed = True
if not (test_getNiceNW['target_aligned'] == '-AAGG--GGTCTCATATC'):
testFailed = True

testFailed = True

resultHW = edlib.align(query="TAAGGATGGTCCCATTC", target="AAGGGGTCTCATATC", mode="HW", task="path")
test_getNiceHW = edlib.getNiceAlignment(resultHW, query="TAAGGATGGTCCCATTC", target="AAGGGGTCTCATATC")
if not (len(test_getNiceHW['query_aligned']) == 18 and len(test_getNiceHW['target_aligned'])==18):
testFailed = True
if not (test_getNiceHW['query_aligned'] == 'TAAGGATGGTCCCAT-TC'):
testFailed = True
testFailed = True
if not (test_getNiceHW['matched_aligned'] == '-||||--||||.|||-||'):
testFailed = True
testFailed = True
if not (test_getNiceHW['target_aligned'] == '-AAGG--GGTCTCATATC'):
testFailed = True

testFailed = True

resultSHW = edlib.align(query="TAAGGATGGTCCCATTC", target="AAGGGGTCTCATATC", mode="SHW", task="path")
test_getNiceSHW = edlib.getNiceAlignment(resultSHW, query="TAAGGATGGTCCCATTC", target="AAGGGGTCTCATATC")
if not (len(test_getNiceSHW['query_aligned']) == 18 and len(test_getNiceSHW['target_aligned'])==18):
testFailed = True
if not (test_getNiceSHW['query_aligned'] == 'TAAGGATGGTCCCAT-TC'):
testFailed = True
testFailed = True
if not (test_getNiceSHW['matched_aligned'] == '-||||--||||.|||-||'):
testFailed = True
testFailed = True
if not (test_getNiceSHW['target_aligned'] == '-AAGG--GGTCTCATATC'):
testFailed = True
testFailed = True

result_taskDistance = edlib.align(query="TAAGGATGGTCCCATTC", target="AAGGGGTCTCATATC", mode="NW", task="distance")
if not (result_taskDistance["cigar"] == None):
testFailed = True

# Empty characters.
result = edlib.align("", "elephant")
testFailed = testFailed or (not (result and result["editDistance"] == 8))
result = edlib.align("telephone", "")
testFailed = testFailed or (not (result and result["editDistance"] == 9))
result = edlib.align("", "elephant", mode="HW")
testFailed = testFailed or (not (result and result["editDistance"] == 0))
result = edlib.align("telephone", "", mode="HW")
testFailed = testFailed or (not (result and result["editDistance"] == 9))
result = edlib.align("", "elephant", mode="SHW")
testFailed = testFailed or (not (result and result["editDistance"] == 0))
result = edlib.align("telephone", "", mode="SHW")
testFailed = testFailed or (not (result and result["editDistance"] == 9))

if testFailed:
print("Some of the tests failed!")
Expand Down
22 changes: 22 additions & 0 deletions edlib/src/edlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <vector>
#include <cstring>
#include <string>
#include <stdexcept>

using namespace std;

Expand Down Expand Up @@ -157,6 +158,27 @@ extern "C" EdlibAlignResult edlibAlign(const char* const queryOriginal, const in
result.alphabetLength = static_cast<int>(alphabet.size());
/*-------------------------------------------------------*/

// Handle special situation when at least one of the sequences has length 0.
if (queryLength == 0 || targetLength == 0) {
if (config.mode == EDLIB_MODE_NW) {
result.editDistance = std::max(queryLength, targetLength);
result.endLocations = (int *) malloc(sizeof(int) * 1);
result.endLocations[0] = targetLength - 1;
result.numLocations = 1;
} else if (config.mode == EDLIB_MODE_SHW || config.mode == EDLIB_MODE_HW) {
result.editDistance = queryLength;
result.endLocations = (int *) malloc(sizeof(int) * 1);
result.endLocations[0] = -1;
result.numLocations = 1;
} else {
throw std::invalid_argument("Received invalid edlib mode.");
}

free(query);
free(target);
return result;
}

/*--------------------- INITIALIZATION ------------------*/
int maxNumBlocks = ceilDiv(queryLength, WORD_SIZE); // bmax in Myers
int W = maxNumBlocks * WORD_SIZE - queryLength; // number of redundant cells in last level blocks
Expand Down
28 changes: 21 additions & 7 deletions test/SimpleEditDistance.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <cstdio>
#include <vector>
#include <stdexcept>
#include "edlib.h"

using namespace std;
Expand All @@ -22,14 +23,29 @@ int min3(int x, int y, int z) {

int calcEditDistanceSimple(const char* query, int queryLength,
const char* target, int targetLength,
EdlibAlignMode mode, int* score,
const EdlibAlignMode mode, int* score,
int** positions_, int* numPositions_) {
int* C = new int[queryLength];
int* newC = new int[queryLength];

int bestScore = -1;
vector<int> positions;
int numPositions = 0;

// Handle as a special situation when one of the sequences has length 0.
if (queryLength == 0 || targetLength == 0) {
if (mode == EDLIB_MODE_NW) {
*score = std::max(queryLength, targetLength);
*positions_ = new int[1] {targetLength - 1};
*numPositions_ = 1;
} else if (mode == EDLIB_MODE_SHW || mode == EDLIB_MODE_HW) {
*score = queryLength;
*positions_ = new int[1] {-1};
*numPositions_ = 1;
} else {
throw std::invalid_argument("Received invalid edlib mode.");
}
return EDLIB_STATUS_OK;
}

int* C = new int[queryLength];
int* newC = new int[queryLength];

// set first column (column zero)
for (int i = 0; i < queryLength; i++) {
Expand Down Expand Up @@ -60,11 +76,9 @@ int calcEditDistanceSimple(const char* query, int queryLength,
if (bestScore == -1 || newScore <= bestScore) {
if (newScore < bestScore) {
positions.clear();
numPositions = 0;
}
bestScore = newScore;
positions.push_back(c);
numPositions++;
}
}

Expand Down
21 changes: 19 additions & 2 deletions test/runTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -552,12 +552,29 @@ bool testCustomEqualityRelation() {
return allPass;
}

bool testEmptySequences() {
printf("Empty query or target:\n");

const char* emptySeq = "";
const char* nonEmptySeq = "ACTG";
const int nonEmptySeqLength = 4;

bool r = true;
r = r && executeTest(emptySeq, 0, nonEmptySeq, nonEmptySeqLength, EDLIB_MODE_NW);
r = r && executeTest(nonEmptySeq, nonEmptySeqLength, emptySeq, 0, EDLIB_MODE_NW);
r = r && executeTest(emptySeq, 0, nonEmptySeq, nonEmptySeqLength, EDLIB_MODE_SHW);
r = r && executeTest(nonEmptySeq, nonEmptySeqLength, emptySeq, 0, EDLIB_MODE_SHW);
r = r && executeTest(emptySeq, 0, nonEmptySeq, nonEmptySeqLength, EDLIB_MODE_HW);
r = r && executeTest(nonEmptySeq, nonEmptySeqLength, emptySeq, 0, EDLIB_MODE_HW);
return r;
}

bool runTests() {
// TODO: make this global vector where tests have to add themselves.
int numTests = 18;
int numTests = 19;
bool (* tests [])() = {test1, test2, test3, test4, test5, test6,
test7, test8, test9, test10, test11, test12, test13, test14, test15, test16,
testCigar, testCustomEqualityRelation};
testCigar, testCustomEqualityRelation, testEmptySequences};

bool allTestsPassed = true;
for (int i = 0; i < numTests; i++) {
Expand Down

0 comments on commit 4ecfd34

Please sign in to comment.