From 8635503298ad9f7c2722201fc5eb50767ec92669 Mon Sep 17 00:00:00 2001 From: Tetiana Tolmachova Date: Thu, 21 Apr 2022 13:38:24 +0200 Subject: [PATCH 001/119] feat(search history): implementation of DBpedia Spotlight --- .../dbpediaSpotlight/Example.java | 61 ++++++++++++ .../common/AnnotationUnit.java | 59 ++++++++++++ .../dbpediaSpotlight/common/Constants.java | 17 ++++ .../dbpediaSpotlight/common/Prefixes.java | 6 ++ .../dbpediaSpotlight/common/ResourceItem.java | 72 +++++++++++++++ .../dbpediaSpotlight/rest/SpotlightBean.java | 92 +++++++++++++++++++ 6 files changed, 307 insertions(+) create mode 100644 src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java create mode 100644 src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/AnnotationUnit.java create mode 100644 src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Constants.java create mode 100644 src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Prefixes.java create mode 100644 src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/ResourceItem.java create mode 100644 src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java new file mode 100644 index 000000000..406461efe --- /dev/null +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java @@ -0,0 +1,61 @@ +package de.l3s.learnweb.searchhistory.dbpediaSpotlight; + +import java.net.URL; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +import de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.AnnotationUnit; +import de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.ResourceItem; +import de.l3s.learnweb.searchhistory.dbpediaSpotlight.rest.SpotlightBean; + +public class Example { + public static void main(String args[]) throws Exception { + SpotlightBean spotlight = new SpotlightBean(); + + URL url = new URL("https://en.wikipedia.org/wiki/Cat"); + Document doc = Jsoup.parse(url, 3*1000); + String text = doc.text(); + + AnnotationUnit annotationUnit = spotlight.get(text); + print(annotationUnit); + } + + private static void print(AnnotationUnit annotationUnit) { + AtomicInteger counter = new AtomicInteger(0); + + long total = annotationUnit.getResources().stream().parallel().count(); + + Comparator compareByUri = Comparator + .comparing(ResourceItem::getUri); + + Comparator compareByUriThenSimScore = Comparator + .comparing(ResourceItem::getUri) + .thenComparing(ResourceItem::getSimilarityScore); + + Set nameSet = new HashSet<>(); + + if (annotationUnit != null) { + System.out.println ("Total: " + total); + + List resourceItemsList = annotationUnit.getResources().stream() + .filter(r -> nameSet.add(r.getSurfaceForm())) + .sorted(compareByUri) + .collect(Collectors.toList()); + + for (ResourceItem item : resourceItemsList) { + System.out.println("URI: " + item.getUri() + ", surface form: " + item.getSurfaceForm() + ", similarity score: " + item.getSimilarityScore()); + counter.incrementAndGet(); + } + + System.out.println ("Total after filtering out: " + counter); + } + + } +} diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/AnnotationUnit.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/AnnotationUnit.java new file mode 100644 index 000000000..744719b63 --- /dev/null +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/AnnotationUnit.java @@ -0,0 +1,59 @@ +package de.l3s.learnweb.searchhistory.dbpediaSpotlight.common; + +import static de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.Constants.*; +import static de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.Prefixes.DBPEDIA_ONTOLOGY; +import static de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.Prefixes.SCHEMA_ONTOLOGY; + +import java.util.List; + +import com.google.gson.annotations.SerializedName; + +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +@Getter +@Setter +@NoArgsConstructor +public class AnnotationUnit { + @SerializedName("@text") + private String text; + + @SerializedName("@confidence") + private String confidence; + + @SerializedName("@support") + private String support; + + @SerializedName("@types") + private String types; + + @SerializedName("@sparql") + private String sparql; + + @SerializedName("@policy") + private String policy; + + @SerializedName("Resources") + private List resources; + + public Integer endIndex() { + if (text != null) { + return text.length(); + } + return 0; + } + + public String getTypes() { + if (types != null && !types.isEmpty()) { + return types.replace("Http", HTTP). + replace(DBPEDIA, DBPEDIA_ONTOLOGY). + replace(SCHEMA, SCHEMA_ONTOLOGY); + } + return types; + } + + public Integer beginIndex() { + return 1; + } +} \ No newline at end of file diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Constants.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Constants.java new file mode 100644 index 000000000..6a2f4deaf --- /dev/null +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Constants.java @@ -0,0 +1,17 @@ +package de.l3s.learnweb.searchhistory.dbpediaSpotlight.common; + +public interface Constants { + + String EMPTY = ""; + + String SLASH = "/"; + + String COMMA = ", "; + + String HTTP = "http"; + + String DBPEDIA = "DBpedia:"; + + String SCHEMA = "Schema:"; + +} diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Prefixes.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Prefixes.java new file mode 100644 index 000000000..278e67e42 --- /dev/null +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Prefixes.java @@ -0,0 +1,6 @@ +package de.l3s.learnweb.searchhistory.dbpediaSpotlight.common; + +public interface Prefixes { + String DBPEDIA_ONTOLOGY = "http://dbpedia.org/ontology/"; + String SCHEMA_ONTOLOGY = "http://schema.org/"; +} diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/ResourceItem.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/ResourceItem.java new file mode 100644 index 000000000..eee65ec1f --- /dev/null +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/ResourceItem.java @@ -0,0 +1,72 @@ +package de.l3s.learnweb.searchhistory.dbpediaSpotlight.common; + +import static de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.Constants.COMMA; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import com.google.gson.annotations.SerializedName; + +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +@Getter +@Setter +@NoArgsConstructor +public class ResourceItem { + @SerializedName("@URI") + private String uri; + + @SerializedName("@support") + private String support; + + @SerializedName("@types") + private String types; + + @SerializedName("@surfaceForm") + private String surfaceForm; + + @SerializedName("@offset") + private String offSet; + + @SerializedName("@similarityScore") + private String similarityScore; + + @SerializedName("@percentageOfSecondRank") + private String percentageOfSecondRank; + + + public Integer beginIndex() { + try { + return Integer.valueOf(offSet); + } catch (NumberFormatException e) { + return 0; + } + } + + public Integer endIndex() { + if (surfaceForm != null) { + return beginIndex() + surfaceForm.length(); + } + + return 0; + } + + public List typesList() { + if (types != null && !types.isEmpty()) { + return Arrays.asList(types.split(COMMA)); + } + + return new ArrayList<>(); + } + + public Double score() { + try { + return Double.valueOf(similarityScore); + } catch (NumberFormatException e) { + return 0d; + } + } +} \ No newline at end of file diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java new file mode 100644 index 000000000..8fb44834a --- /dev/null +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java @@ -0,0 +1,92 @@ +package de.l3s.learnweb.searchhistory.dbpediaSpotlight.rest; + +import static de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.Constants.EMPTY; +import static de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.Prefixes.DBPEDIA_ONTOLOGY; +import static de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.Prefixes.SCHEMA_ONTOLOGY; +import static org.apache.http.HttpHeaders.ACCEPT; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; + +import org.apache.http.HttpResponse; +import org.apache.http.NameValuePair; +import org.apache.http.client.HttpClient; +import org.apache.http.client.entity.UrlEncodedFormEntity; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.message.BasicNameValuePair; +import org.jsoup.Jsoup; + +import com.google.gson.Gson; + +import de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.AnnotationUnit; +import de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.ResourceItem; + +public class SpotlightBean { + private static final String URL = "https://api.dbpedia-spotlight.org/en/annotate"; + private final HttpClient client; + private final HttpPost request; + + public SpotlightBean() { + client = HttpClientBuilder.create().build(); + request = new HttpPost(URL); + + init(); + } + + private void init() { + request.addHeader(ACCEPT, "application/json"); + } + + private AnnotationUnit get() throws IOException { + Gson gson = new Gson(); + AnnotationUnit annotationUnit = gson.fromJson(getContent(), AnnotationUnit.class); + fixPrefixes(annotationUnit.getResources()); + + return annotationUnit; + } + + private String fixPrefixes(String value) { + if (value != null && !value.isEmpty()) { + return value.replace("Http", "http"). + replace("DBpedia:", DBPEDIA_ONTOLOGY). + replace("Schema:", SCHEMA_ONTOLOGY); + } + + return value; + } + + private void fixPrefixes(ResourceItem resource) { + resource.setTypes(fixPrefixes(resource.getTypes())); + } + + private void fixPrefixes(List resources) { + if (resources != null && !resources.isEmpty()) { + resources.forEach(resourceItem -> fixPrefixes(resourceItem)); + } + } + + private String getContent() throws IOException { + HttpResponse response = client.execute(request); + BufferedReader rd = new BufferedReader(new InputStreamReader(response.getEntity().getContent())); + StringBuffer result = new StringBuffer(); + String line = EMPTY; + + while ((line = rd.readLine()) != null) { + result.append(line); + } + + return result.toString(); + } + + public AnnotationUnit get(String text) throws IOException { + List params = new ArrayList(); + params.add(new BasicNameValuePair("text", text)); + request.setEntity(new UrlEncodedFormEntity(params)); + + return get(); + } +} From 0a8cac24a4c2351d26a935c7524ba30572d6d606 Mon Sep 17 00:00:00 2001 From: Tetiana Tolmachova Date: Mon, 25 Apr 2022 14:05:45 +0200 Subject: [PATCH 002/119] fix(search history): fix DBpedia Spotlight code for lint --- .../dbpediaSpotlight/rest/SpotlightBean.java | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java index 8fb44834a..70af6ce31 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java @@ -49,6 +49,14 @@ private AnnotationUnit get() throws IOException { return annotationUnit; } + public AnnotationUnit get(String text) throws IOException { + List params = new ArrayList(); + params.add(new BasicNameValuePair("text", text)); + request.setEntity(new UrlEncodedFormEntity(params)); + + return get(); + } + private String fixPrefixes(String value) { if (value != null && !value.isEmpty()) { return value.replace("Http", "http"). @@ -82,11 +90,4 @@ private String getContent() throws IOException { return result.toString(); } - public AnnotationUnit get(String text) throws IOException { - List params = new ArrayList(); - params.add(new BasicNameValuePair("text", text)); - request.setEntity(new UrlEncodedFormEntity(params)); - - return get(); - } } From 17bee07c3bbd24d08c254f0c1010c2b5681e8f8d Mon Sep 17 00:00:00 2001 From: Tetiana Tolmachova Date: Wed, 27 Apr 2022 16:37:20 +0200 Subject: [PATCH 003/119] fix(search history): fix DBpedia Spotlight code for lint --- .../learnweb/searchhistory/dbpediaSpotlight/Example.java | 4 ---- .../dbpediaSpotlight/common/AnnotationUnit.java | 8 ++++---- .../searchhistory/dbpediaSpotlight/common/Constants.java | 5 +++++ .../searchhistory/dbpediaSpotlight/common/Prefixes.java | 6 ++++++ .../dbpediaSpotlight/common/ResourceItem.java | 2 +- .../dbpediaSpotlight/rest/SpotlightBean.java | 7 +++---- 6 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java index 406461efe..bf134d992 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java @@ -35,10 +35,6 @@ private static void print(AnnotationUnit annotationUnit) { Comparator compareByUri = Comparator .comparing(ResourceItem::getUri); - Comparator compareByUriThenSimScore = Comparator - .comparing(ResourceItem::getUri) - .thenComparing(ResourceItem::getSimilarityScore); - Set nameSet = new HashSet<>(); if (annotationUnit != null) { diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/AnnotationUnit.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/AnnotationUnit.java index 744719b63..4a7eaaae1 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/AnnotationUnit.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/AnnotationUnit.java @@ -46,9 +46,9 @@ public Integer endIndex() { public String getTypes() { if (types != null && !types.isEmpty()) { - return types.replace("Http", HTTP). - replace(DBPEDIA, DBPEDIA_ONTOLOGY). - replace(SCHEMA, SCHEMA_ONTOLOGY); + return types.replace("Http", HTTP) + .replace(DBPEDIA, DBPEDIA_ONTOLOGY) + .replace(SCHEMA, SCHEMA_ONTOLOGY); } return types; } @@ -56,4 +56,4 @@ public String getTypes() { public Integer beginIndex() { return 1; } -} \ No newline at end of file +} diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Constants.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Constants.java index 6a2f4deaf..5be0fe8b2 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Constants.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Constants.java @@ -14,4 +14,9 @@ public interface Constants { String SCHEMA = "Schema:"; + /** + * This method is only here to make lint happy + */ + default void doNothing() { + } } diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Prefixes.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Prefixes.java index 278e67e42..c130f3d6b 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Prefixes.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/Prefixes.java @@ -3,4 +3,10 @@ public interface Prefixes { String DBPEDIA_ONTOLOGY = "http://dbpedia.org/ontology/"; String SCHEMA_ONTOLOGY = "http://schema.org/"; + + /** + * This method is only here to make lint happy + */ + default void doNothing() { + } } diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/ResourceItem.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/ResourceItem.java index eee65ec1f..7b0b49bf8 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/ResourceItem.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/ResourceItem.java @@ -69,4 +69,4 @@ public Double score() { return 0d; } } -} \ No newline at end of file +} diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java index 70af6ce31..ee82ba74a 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java @@ -18,7 +18,6 @@ import org.apache.http.client.methods.HttpPost; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.message.BasicNameValuePair; -import org.jsoup.Jsoup; import com.google.gson.Gson; @@ -59,9 +58,9 @@ public AnnotationUnit get(String text) throws IOException { private String fixPrefixes(String value) { if (value != null && !value.isEmpty()) { - return value.replace("Http", "http"). - replace("DBpedia:", DBPEDIA_ONTOLOGY). - replace("Schema:", SCHEMA_ONTOLOGY); + return value.replace("Http", "http") + .replace("DBpedia:", DBPEDIA_ONTOLOGY) + .replace("Schema:", SCHEMA_ONTOLOGY); } return value; From 9e76d4725314d76686c23a46229a338c460d6fa0 Mon Sep 17 00:00:00 2001 From: Tetiana Tolmachova Date: Thu, 28 Apr 2022 14:42:06 +0200 Subject: [PATCH 004/119] fix(search history): fix DBpedia Spotlight code for lint --- .../learnweb/searchhistory/dbpediaSpotlight/Example.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java index bf134d992..e896c3b2e 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java @@ -16,11 +16,11 @@ import de.l3s.learnweb.searchhistory.dbpediaSpotlight.rest.SpotlightBean; public class Example { - public static void main(String args[]) throws Exception { + public static void main(String[] args) throws Exception { SpotlightBean spotlight = new SpotlightBean(); URL url = new URL("https://en.wikipedia.org/wiki/Cat"); - Document doc = Jsoup.parse(url, 3*1000); + Document doc = Jsoup.parse(url, 3 * 1000); String text = doc.text(); AnnotationUnit annotationUnit = spotlight.get(text); @@ -38,7 +38,7 @@ private static void print(AnnotationUnit annotationUnit) { Set nameSet = new HashSet<>(); if (annotationUnit != null) { - System.out.println ("Total: " + total); + System.out.println("Total: " + total); List resourceItemsList = annotationUnit.getResources().stream() .filter(r -> nameSet.add(r.getSurfaceForm())) From e412f5f05d992e7169cd96907cfa3d711725d313 Mon Sep 17 00:00:00 2001 From: Tetiana Tolmachova Date: Thu, 28 Apr 2022 14:44:58 +0200 Subject: [PATCH 005/119] fix(search history): additional small fix of DBpedia Spotlight code for lint --- src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java index e896c3b2e..e9cc2a440 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java @@ -50,7 +50,7 @@ private static void print(AnnotationUnit annotationUnit) { counter.incrementAndGet(); } - System.out.println ("Total after filtering out: " + counter); + System.out.println("Total after filtering out: " + counter); } } From 47de2b6d2b00a87cfc144856ae37dfa7a726db8f Mon Sep 17 00:00:00 2001 From: Tetiana Tolmachova Date: Thu, 28 Apr 2022 16:20:12 +0200 Subject: [PATCH 006/119] fix(search history): remove lombok annotations from DBpedia Spotlight code --- .../common/AnnotationUnit.java | 59 +++++++++++++++-- .../dbpediaSpotlight/common/ResourceItem.java | 64 ++++++++++++++++--- 2 files changed, 108 insertions(+), 15 deletions(-) diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/AnnotationUnit.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/AnnotationUnit.java index 4a7eaaae1..2d33f42d4 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/AnnotationUnit.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/AnnotationUnit.java @@ -8,13 +8,6 @@ import com.google.gson.annotations.SerializedName; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; - -@Getter -@Setter -@NoArgsConstructor public class AnnotationUnit { @SerializedName("@text") private String text; @@ -37,6 +30,58 @@ public class AnnotationUnit { @SerializedName("Resources") private List resources; + public String getText() { + return text; + } + + public void setText(final String text) { + this.text = text; + } + + public String getConfidence() { + return confidence; + } + + public void setConfidence(final String confidence) { + this.confidence = confidence; + } + + public String getSupport() { + return support; + } + + public void setSupport(final String support) { + this.support = support; + } + + public void setTypes(final String types) { + this.types = types; + } + + public String getSparql() { + return sparql; + } + + public void setSparql(final String sparql) { + this.sparql = sparql; + } + + public String getPolicy() { + return policy; + } + + public void setPolicy(final String policy) { + this.policy = policy; + } + + public List getResources() { + return resources; + } + + public void setResources(final List resources) { + this.resources = resources; + } + public Integer endIndex() { if (text != null) { return text.length(); diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/ResourceItem.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/ResourceItem.java index 7b0b49bf8..7d2ea605a 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/ResourceItem.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/common/ResourceItem.java @@ -8,13 +8,6 @@ import com.google.gson.annotations.SerializedName; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; - -@Getter -@Setter -@NoArgsConstructor public class ResourceItem { @SerializedName("@URI") private String uri; @@ -31,13 +24,68 @@ public class ResourceItem { @SerializedName("@offset") private String offSet; + public String getUri() { + return uri; + } + + public void setUri(final String uri) { + this.uri = uri; + } + + public String getSupport() { + return support; + } + + public void setSupport(final String support) { + this.support = support; + } + + public String getTypes() { + return types; + } + + public void setTypes(final String types) { + this.types = types; + } + + public String getSurfaceForm() { + return surfaceForm; + } + + public void setSurfaceForm(final String surfaceForm) { + this.surfaceForm = surfaceForm; + } + + public String getOffSet() { + return offSet; + } + + public void setOffSet(final String offSet) { + this.offSet = offSet; + } + + public String getSimilarityScore() { + return similarityScore; + } + + public void setSimilarityScore(final String similarityScore) { + this.similarityScore = similarityScore; + } + + public String getPercentageOfSecondRank() { + return percentageOfSecondRank; + } + + public void setPercentageOfSecondRank(final String percentageOfSecondRank) { + this.percentageOfSecondRank = percentageOfSecondRank; + } + @SerializedName("@similarityScore") private String similarityScore; @SerializedName("@percentageOfSecondRank") private String percentageOfSecondRank; - public Integer beginIndex() { try { return Integer.valueOf(offSet); From 26fa3c565c52f9053b72c1894eac6c347f2e8daa Mon Sep 17 00:00:00 2001 From: Tetiana Tolmachova Date: Fri, 29 Apr 2022 16:48:27 +0200 Subject: [PATCH 007/119] fix(search history): spotbugs errors fix of DBpedia Spotlight code --- .../dbpediaSpotlight/Example.java | 3 --- .../dbpediaSpotlight/rest/SpotlightBean.java | 18 ++++++++++++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java index e9cc2a440..80e93685b 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java @@ -37,7 +37,6 @@ private static void print(AnnotationUnit annotationUnit) { Set nameSet = new HashSet<>(); - if (annotationUnit != null) { System.out.println("Total: " + total); List resourceItemsList = annotationUnit.getResources().stream() @@ -51,7 +50,5 @@ private static void print(AnnotationUnit annotationUnit) { } System.out.println("Total after filtering out: " + counter); - } - } } diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java index ee82ba74a..326b530a6 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java @@ -78,13 +78,27 @@ private void fixPrefixes(List resources) { private String getContent() throws IOException { HttpResponse response = client.execute(request); - BufferedReader rd = new BufferedReader(new InputStreamReader(response.getEntity().getContent())); + StringBuffer result = new StringBuffer(); + try (BufferedReader rd = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), "UTF-8"))) { + String line = EMPTY; + + while ((line = rd.readLine()) != null) { + result.append(line); + } + + } catch (IOException e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + + /*HttpResponse response = client.execute(request); + BufferedReader rd = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), "UTF-8")); StringBuffer result = new StringBuffer(); String line = EMPTY; while ((line = rd.readLine()) != null) { result.append(line); - } + }*/ return result.toString(); } From 5365525b48133245c99959e40d5c2bb8d37f58ba Mon Sep 17 00:00:00 2001 From: Tetiana Tolmachova Date: Wed, 18 May 2022 14:02:34 +0200 Subject: [PATCH 008/119] fix(dbpedia spotlight): add number of occurrences per URI --- .../dbpediaSpotlight/Example.java | 54 -------- .../dbpediaSpotlight/Parser.java | 115 ++++++++++++++++++ .../dbpediaSpotlight/rest/SpotlightBean.java | 9 ++ 3 files changed, 124 insertions(+), 54 deletions(-) delete mode 100644 src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java create mode 100644 src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Parser.java diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java deleted file mode 100644 index 80e93685b..000000000 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Example.java +++ /dev/null @@ -1,54 +0,0 @@ -package de.l3s.learnweb.searchhistory.dbpediaSpotlight; - -import java.net.URL; -import java.util.Comparator; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; - -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; - -import de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.AnnotationUnit; -import de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.ResourceItem; -import de.l3s.learnweb.searchhistory.dbpediaSpotlight.rest.SpotlightBean; - -public class Example { - public static void main(String[] args) throws Exception { - SpotlightBean spotlight = new SpotlightBean(); - - URL url = new URL("https://en.wikipedia.org/wiki/Cat"); - Document doc = Jsoup.parse(url, 3 * 1000); - String text = doc.text(); - - AnnotationUnit annotationUnit = spotlight.get(text); - print(annotationUnit); - } - - private static void print(AnnotationUnit annotationUnit) { - AtomicInteger counter = new AtomicInteger(0); - - long total = annotationUnit.getResources().stream().parallel().count(); - - Comparator compareByUri = Comparator - .comparing(ResourceItem::getUri); - - Set nameSet = new HashSet<>(); - - System.out.println("Total: " + total); - - List resourceItemsList = annotationUnit.getResources().stream() - .filter(r -> nameSet.add(r.getSurfaceForm())) - .sorted(compareByUri) - .collect(Collectors.toList()); - - for (ResourceItem item : resourceItemsList) { - System.out.println("URI: " + item.getUri() + ", surface form: " + item.getSurfaceForm() + ", similarity score: " + item.getSimilarityScore()); - counter.incrementAndGet(); - } - - System.out.println("Total after filtering out: " + counter); - } -} diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Parser.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Parser.java new file mode 100644 index 000000000..8b0b3da1a --- /dev/null +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Parser.java @@ -0,0 +1,115 @@ +package de.l3s.learnweb.searchhistory.dbpediaSpotlight; + +import static java.util.stream.Collectors.counting; +import static java.util.stream.Collectors.groupingBy; + +import java.net.URL; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Predicate; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +import de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.AnnotationUnit; +import de.l3s.learnweb.searchhistory.dbpediaSpotlight.common.ResourceItem; +import de.l3s.learnweb.searchhistory.dbpediaSpotlight.rest.SpotlightBean; + +public class Parser { + private static AnnotationUnit annotationUnit; + + public static void main(String[] args) throws Exception { + SpotlightBean spotlight = new SpotlightBean(); + + /** + * Classify by text + */ + String text = "Berlin is the capital of Germany Berlin"; + annotationUnit = spotlight.get(text); + print(annotationUnit); + + /** + * Classify by URL + */ + URL url = new URL("https://en.wikipedia.org/wiki/Cat"); + Document doc = Jsoup.parse(url, 3 * 1000); + String textFromUrl = doc.text(); + annotationUnit = spotlight.get(textFromUrl); + print(annotationUnit); + } + + private static void print(AnnotationUnit annotationUnit) { + Set nameSet = new HashSet<>(); + + AtomicInteger counter = new AtomicInteger(0); + + long total = annotationUnit.getResources().stream().parallel().count(); + + Comparator compareByURI = Comparator + .comparing(ResourceItem::getUri); + Comparator compareBySimScore = Comparator + .comparing(ResourceItem::getSimilarityScore); + Comparator compareByUriThenSurfaceForm = Comparator + .comparing(ResourceItem::getUri).thenComparing(ResourceItem::getSurfaceForm); + + Predicate filterBySimScore = r -> Double.parseDouble(r.getSimilarityScore()) >= 0.9; + Predicate filterBySurfaceForm = r -> nameSet.add(r.getSurfaceForm()); + Predicate filterByURI = r -> nameSet.add(r.getUri()); + + System.out.println("Total: " + total); + + /** + * Show retrieved list from DBpedia Spotlight with sorting out by URI + */ + /*List resourceItemsList = annotationUnit.getResources().stream() + .sorted(compareByUriThenSurfaceForm) + .collect(Collectors.toList()); + + for (ResourceItem item : resourceItemsList) { + System.out.println("URI: " + item.getUri() + ", surface form: " + item.getSurfaceForm() + ", similarity score: " + item.getSimilarityScore()); + counter.incrementAndGet(); + }*/ + + /** + * Show retrieved list from DBpedia Spotlight with filtering - one surface form with the best similarity score + */ + /*List resourceItemsList = annotationUnit.getResources().stream() + .filter(r -> nameSet.add(r.getSurfaceForm())) + .sorted(compareByUri) + .collect(Collectors.toList()); + + for (ResourceItem item : resourceItemsList) { + System.out.println("URI: " + item.getUri() + ", surface form: " + item.getSurfaceForm() + ", similarity score: " + item.getSimilarityScore()); + counter.incrementAndGet(); + }*/ + + /** + * Show retrieved list from DBpedia Spotlight by filtering similarity score >= 0.85 && one surface form with the best similarity score + */ + /*List resourceItemsList = annotationUnit.getResources().stream() + .filter(filterBySimScore.and(filterByURI)) + .sorted(compareByURI) + .collect(Collectors.toList()); + + for (ResourceItem item : resourceItemsList) { + System.out.println("URI: " + item.getUri() + ", surface form: " + item.getSurfaceForm() + ", similarity score: " + item.getSimilarityScore()); + counter.incrementAndGet(); + } + + System.out.println("Total after filtering out: " + counter);*/ + + /** + * Number of occurrences per URI + */ + Map uriPerType = annotationUnit.getResources().stream() + .collect(groupingBy(ResourceItem::getUri, counting())); + + uriPerType.entrySet().stream() + .sorted(Map.Entry.comparingByValue().reversed()) + .limit(5) + .forEach(r -> System.out.println("URI: " + r.getKey() + ", occurrences: " + r.getValue())); + } +} diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java index 326b530a6..ee6ddcd35 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/rest/SpotlightBean.java @@ -8,6 +8,7 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; +import java.net.URL; import java.util.ArrayList; import java.util.List; @@ -56,6 +57,14 @@ public AnnotationUnit get(String text) throws IOException { return get(); } + public AnnotationUnit get(URL url) throws IOException { + List params = new ArrayList(); + params.add(new BasicNameValuePair("url", url.toString())); + request.setEntity(new UrlEncodedFormEntity(params)); + + return get(); + } + private String fixPrefixes(String value) { if (value != null && !value.isEmpty()) { return value.replace("Http", "http") From 4a99e8c3f29554dcc63274b360a9df8f4385ce18 Mon Sep 17 00:00:00 2001 From: Tetiana Tolmachova Date: Thu, 19 May 2022 16:06:14 +0200 Subject: [PATCH 009/119] fix(dbpedia spotlight): clean code --- .../dbpediaSpotlight/Parser.java | 77 ++----------------- 1 file changed, 7 insertions(+), 70 deletions(-) diff --git a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Parser.java b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Parser.java index 8b0b3da1a..63c39121d 100644 --- a/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Parser.java +++ b/src/de/l3s/learnweb/searchhistory/dbpediaSpotlight/Parser.java @@ -4,12 +4,7 @@ import static java.util.stream.Collectors.groupingBy; import java.net.URL; -import java.util.Comparator; -import java.util.HashSet; import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Predicate; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -24,16 +19,12 @@ public class Parser { public static void main(String[] args) throws Exception { SpotlightBean spotlight = new SpotlightBean(); - /** - * Classify by text - */ + //Classify by plain text String text = "Berlin is the capital of Germany Berlin"; annotationUnit = spotlight.get(text); print(annotationUnit); - /** - * Classify by URL - */ + //Classify by URL URL url = new URL("https://en.wikipedia.org/wiki/Cat"); Document doc = Jsoup.parse(url, 3 * 1000); String textFromUrl = doc.text(); @@ -41,69 +32,15 @@ public static void main(String[] args) throws Exception { print(annotationUnit); } + /** + * Print number of occurrences per URI + * + * @author Tetiana Tolmachova + */ private static void print(AnnotationUnit annotationUnit) { - Set nameSet = new HashSet<>(); - - AtomicInteger counter = new AtomicInteger(0); - long total = annotationUnit.getResources().stream().parallel().count(); - - Comparator compareByURI = Comparator - .comparing(ResourceItem::getUri); - Comparator compareBySimScore = Comparator - .comparing(ResourceItem::getSimilarityScore); - Comparator compareByUriThenSurfaceForm = Comparator - .comparing(ResourceItem::getUri).thenComparing(ResourceItem::getSurfaceForm); - - Predicate filterBySimScore = r -> Double.parseDouble(r.getSimilarityScore()) >= 0.9; - Predicate filterBySurfaceForm = r -> nameSet.add(r.getSurfaceForm()); - Predicate filterByURI = r -> nameSet.add(r.getUri()); - System.out.println("Total: " + total); - /** - * Show retrieved list from DBpedia Spotlight with sorting out by URI - */ - /*List resourceItemsList = annotationUnit.getResources().stream() - .sorted(compareByUriThenSurfaceForm) - .collect(Collectors.toList()); - - for (ResourceItem item : resourceItemsList) { - System.out.println("URI: " + item.getUri() + ", surface form: " + item.getSurfaceForm() + ", similarity score: " + item.getSimilarityScore()); - counter.incrementAndGet(); - }*/ - - /** - * Show retrieved list from DBpedia Spotlight with filtering - one surface form with the best similarity score - */ - /*List resourceItemsList = annotationUnit.getResources().stream() - .filter(r -> nameSet.add(r.getSurfaceForm())) - .sorted(compareByUri) - .collect(Collectors.toList()); - - for (ResourceItem item : resourceItemsList) { - System.out.println("URI: " + item.getUri() + ", surface form: " + item.getSurfaceForm() + ", similarity score: " + item.getSimilarityScore()); - counter.incrementAndGet(); - }*/ - - /** - * Show retrieved list from DBpedia Spotlight by filtering similarity score >= 0.85 && one surface form with the best similarity score - */ - /*List resourceItemsList = annotationUnit.getResources().stream() - .filter(filterBySimScore.and(filterByURI)) - .sorted(compareByURI) - .collect(Collectors.toList()); - - for (ResourceItem item : resourceItemsList) { - System.out.println("URI: " + item.getUri() + ", surface form: " + item.getSurfaceForm() + ", similarity score: " + item.getSimilarityScore()); - counter.incrementAndGet(); - } - - System.out.println("Total after filtering out: " + counter);*/ - - /** - * Number of occurrences per URI - */ Map uriPerType = annotationUnit.getResources().stream() .collect(groupingBy(ResourceItem::getUri, counting())); From 709d6b4bfac59eb483c703c28ab12f91c7bcc2fe Mon Sep 17 00:00:00 2001 From: Viet Trung Tran Date: Tue, 9 Aug 2022 00:45:29 +0200 Subject: [PATCH 010/119] feat: merge dbpedia-spotlight, add annotation graph --- WebContent/lw/myhome/search_history.xhtml | 14 +- WebContent/resources/dataExample.json | 1 + .../resources/learnweb/js/annotation.js | 126 ++++++++++++++++++ .../l3s/learnweb/searchhistory/JsonQuery.java | 112 ++++++++++++++++ .../searchhistory/SearchHistoryBean.java | 41 +++++- .../learnweb/searchhistory/SearchQuery.java | 1 + .../learnweb/searchhistory/SearchSession.java | 2 + src/de/l3s/learnweb/user/UserBean.java | 10 +- 8 files changed, 301 insertions(+), 6 deletions(-) create mode 100644 WebContent/resources/dataExample.json create mode 100644 WebContent/resources/learnweb/js/annotation.js create mode 100644 src/de/l3s/learnweb/searchhistory/JsonQuery.java diff --git a/WebContent/lw/myhome/search_history.xhtml b/WebContent/lw/myhome/search_history.xhtml index ec5b6c55a..784d57932 100644 --- a/WebContent/lw/myhome/search_history.xhtml +++ b/WebContent/lw/myhome/search_history.xhtml @@ -23,6 +23,11 @@ +