Skip to content

Commit

Permalink
Fix malformed URL parsing bug
Browse files Browse the repository at this point in the history
  • Loading branch information
jlinn committed Sep 25, 2017
1 parent 4996c25 commit 78a0286
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 4 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ This plugin enables URL tokenization and token filtering by URL part.

| Elasticsearch Version | Plugin Version |
|-----------------------|----------------|
| 2.4.6 | 2.4.6.0 |
| 2.4.5 | 2.4.5.0 |
| 2.4.4 | 2.4.4.0 |
| 2.4.3 | 2.4.3.0 |
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-analysis-url</artifactId>
<version>2.4.5.0</version>
<version>2.4.6.0</version>
<packaging>jar</packaging>
<description>Elasticsearch URL token filter plugin</description>

Expand All @@ -18,7 +18,7 @@

<properties>
<project.build.sourceEncodint>UTF-8</project.build.sourceEncodint>
<elasticsearch.version>2.4.5</elasticsearch.version>
<elasticsearch.version>2.4.6</elasticsearch.version>
<lucene.version>5.5.4</lucene.version>
<hamcrest.version>1.3</hamcrest.version>
<tests.output>onerror</tests.output>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -504,8 +504,10 @@ private List<Token> tokenizeSpecial(URL url) {
// protocol://host
token = getPart(url, URLPart.PROTOCOL) + "://" + getPart(url, URLPart.HOST);
start = getStartIndex(url, token);
end = getEndIndex(start, token);
tokens.add(new Token(token, URLPart.WHOLE, start, end));
if (start != -1) {
end = getEndIndex(start, token);
tokens.add(new Token(token, URLPart.WHOLE, start, end));
}
return tokens;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ public void testAnalyze() {
}


@Test
public void testAnalyzePartial() throws Exception {
assertTokensContain("http://", "tokenizer_url_all", ":80", "http:", "http", "80");
}


@Test
public void testAnalyzeWhole() throws Exception {
List<AnalyzeResponse.AnalyzeToken> tokens = analyzeURL("http://foo.bar.com", "tokenizer_url_all_malformed");
Expand Down

0 comments on commit 78a0286

Please sign in to comment.