From 732700cb281eab4bf9897d83470ed2bbea9c94c8 Mon Sep 17 00:00:00 2001
From: LukasBluebaum <38468743+LukasBluebaum@users.noreply.github.com>
Date: Wed, 8 Apr 2020 17:35:21 +0200
Subject: [PATCH 01/10] Use HTTPS to resolve dependencies in Maven Build
---
pom.xml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/pom.xml b/pom.xml
index 90328364e..2de97f1ac 100644
--- a/pom.xml
+++ b/pom.xml
@@ -37,12 +37,12 @@
maven.aksw.internal
University Leipzig, AKSW Maven2 Repository
- http://maven.aksw.org/repository/internal
+ https://maven.aksw.org/repository/internal
maven.aksw.snapshots
University Leipzig, AKSW Maven2 Repository
- http://maven.aksw.org/repository/snapshots
+ https://maven.aksw.org/repository/snapshots
Apache Repo
From f68b02fb70cf7e18d6f2caca77a5c4ba00323a08 Mon Sep 17 00:00:00 2001
From: Micha
Date: Tue, 10 Aug 2021 18:42:39 +0200
Subject: [PATCH 02/10] Added dockeringore file. Separated the start script
into several modular scripts. Updated the creation of the docker image.
Created a docker-compose file.
---
.dockerignore | 16 ++
Dockerfile | 40 ++--
docker-compose.yml | 17 ++
docker_build.sh | 12 +-
pom.xml | 93 +++++-----
scripts/download_data.sh | 33 ++++
scripts/download_indexes.sh | 48 +++++
scripts/functions.sh | 110 +++++++++++
.../gerbil/config/GerbilConfiguration.java | 18 +-
.../aksw/gerbil/web/config/RootConfig.java | 13 +-
start.sh | 173 +-----------------
start_in_docker.sh | 23 +++
12 files changed, 348 insertions(+), 248 deletions(-)
create mode 100644 .dockerignore
create mode 100644 docker-compose.yml
create mode 100755 scripts/download_data.sh
create mode 100755 scripts/download_indexes.sh
create mode 100644 scripts/functions.sh
create mode 100755 start_in_docker.sh
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 000000000..bc312aa57
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,16 @@
+.git
+#src except src/main/properties (gerbil_keys.properties may contain secrets)
+src/main/java
+src/main/resources
+src/main/properties/gerbil_keys.properties
+src/test
+# data directories
+gerbil_data
+indexes
+repository
+# block target, except war file and WEB-INF
+target/*
+!target/gerbil*
+# block the gerbil_keys file in case it is in the target directory
+target/gerbil-*/WEB-INF/classes/gerbil_keys.properties
+
diff --git a/Dockerfile b/Dockerfile
index b60dd0d64..dc8a367ef 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,26 +1,26 @@
-#############################
-# BUILD THE WAR FILE
-#############################
-
-FROM maven:3.6.0-jdk-8 AS build
-
-COPY src /tmp/src/
-COPY repository /tmp/repository/
-COPY pom.xml /tmp/
-
-# overwrite gerbil-data path:
-COPY docker-config/* /tmp/src/main/properties/
+FROM tomcat:7-jre8-alpine
+# Remove example projects, etc.
+RUN rm -rf /usr/local/tomcat/webapps/*
-WORKDIR /tmp/
+# Download gerbil_data
+RUN mkdir /data && mkdir /usr/local/tomcat/gerbil_data && mkdir /usr/local/tomcat/gerbil_data/cache && mkdir /usr/local/tomcat/gerbil_data/configs && mkdir /usr/local/tomcat/gerbil_data/database && mkdir /usr/local/tomcat/gerbil_data/datasets && mkdir /usr/local/tomcat/gerbil_data/indexes && mkdir /usr/local/tomcat/gerbil_data/output && mkdir /usr/local/tomcat/gerbil_data/resources && mkdir /usr/local/tomcat/gerbil_data/upload && mkdir /usr/local/tomcat/gerbil_data/systems
+COPY scripts/download_data.sh download_data.sh
+COPY scripts/functions.sh functions.sh
+RUN ./download_data.sh /data
-RUN mvn package -U -DskipTests
+# Copy GERBIL's war file
+COPY target/gerbil-*.war /usr/local/tomcat/webapps/gerbil.war
-#############################
-# BUILD THE DOCKER CONTAINER
-#############################
+# Copy GERBIL's properties files (from target, not from source!)
+COPY target/gerbil-*/WEB-INF/classes/*.properties /data/properties/
+RUN touch /data/properties/gerbil_keys.properties
-FROM tomcat:7-jre8-alpine
+# Set path to properties files
+ENV GERBIL_PROP_DIR=/usr/local/tomcat/gerbil_properties/
+# Create directory for properties
+RUN mkdir /usr/local/tomcat/gerbil_properties/
-RUN touch 20190115.txt
+# Copy start script
+COPY start_in_docker.sh start_in_docker.sh
-COPY --from=build /tmp/target/gerbil-*.war $CATALINA_HOME/webapps/$gerbil.war
\ No newline at end of file
+CMD ./start_in_docker.sh
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 000000000..0a034aa5c
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,17 @@
+version: "3.3"
+services:
+ gerbil:
+ image: dicegroup/gerbil
+ ports:
+ - "1234:8080"
+ volumes:
+ # Properties files
+ - ./gerbil_properties:/usr/local/tomcat/gerbil_properties
+ # Database (Should be mounted to persist data!)
+ - ./gerbil_data/database:/usr/local/tomcat/gerbil_data/database
+ # Cache directory (Should be mounted to speed up new GERBIL instances)
+ - ./gerbil_data/cache:/usr/local/tomcat/gerbil_data/cache
+ # Datasets
+ - ./gerbil_data/datasets:/usr/local/tomcat/gerbil_data/datasets
+ # SameAs and Entity Indexes
+ - ./indexes:/usr/local/tomcat/indexes
diff --git a/docker_build.sh b/docker_build.sh
index a6d4a7a6b..96f4c1b34 100755
--- a/docker_build.sh
+++ b/docker_build.sh
@@ -4,15 +4,15 @@ export PROJECT="gerbil"
VERSION=$(mvn -q -Dexec.executable="echo" -Dexec.args='${project.version}' --non-recursive org.codehaus.mojo:exec-maven-plugin:1.3.1:exec)
# build the docker container
-docker build --no-cache -t philippkuntschik/$PROJECT .
+docker build -t dicegroup/$PROJECT .
-docker tag philippkuntschik/$PROJECT philippkuntschik/$PROJECT:$VERSION
-docker tag philippkuntschik/$PROJECT philippkuntschik/$PROJECT:latest
+docker tag dicegroup/$PROJECT dicegroup/$PROJECT:$VERSION
+docker tag dicegroup/$PROJECT dicegroup/$PROJECT:latest
# upload the image
if [[ $1 == "--upload" ]]
then
echo 'uploading...'
- sudo docker push philippkuntschik/$PROJECT:$VERSION
- sudo docker push philippkuntschik/$PROJECT:latest
-fi
+ sudo docker push dicegroup/$PROJECT:$VERSION
+ sudo docker push dicegroup/$PROJECT:latest
+fi
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index ddca34f6e..48dc8d466 100644
--- a/pom.xml
+++ b/pom.xml
@@ -65,15 +65,15 @@
jena-arq
${jena.version}
-
-
+
+
-
- com.fasterxml.jackson.core
- jackson-databind
- 2.9.0
-
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.9.0
+
@@ -338,11 +338,11 @@
commons-compress
1.19
-
- com.google.code.externalsortinginjava
- externalsortinginjava
- 0.1.10
-
+
+ com.google.code.externalsortinginjava
+ externalsortinginjava
+ 0.1.10
+
@@ -426,38 +426,43 @@
-
-
- org.apache.maven.plugins
- maven-surefire-plugin
- 2.22.2
-
-
-
- target/jacoco.exec
-
-
-
-
-
- org.jacoco
- jacoco-maven-plugin
- 0.8.5
-
-
- default-prepare-agent
-
- prepare-agent
-
-
-
- default-report
-
- report
-
-
-
-
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 2.22.2
+
+
+
+ target/jacoco.exec
+
+
+
+
+
+ org.jacoco
+ jacoco-maven-plugin
+ 0.8.5
+
+
+ default-prepare-agent
+
+ prepare-agent
+
+
+
+ default-report
+
+ report
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-war-plugin
+ 3.3.1
+
diff --git a/scripts/download_data.sh b/scripts/download_data.sh
new file mode 100755
index 000000000..6ec70ff31
--- /dev/null
+++ b/scripts/download_data.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# Include file (from https://stackoverflow.com/a/12694189)
+DIR="${BASH_SOURCE%/*}"
+if [[ ! -d "$DIR" ]]; then DIR="$PWD"; fi
+source "$DIR/functions.sh"
+
+if [ $# -eq 0 ]; then
+ outputDir="$PWD"
+ file="gerbil_data.zip"
+else
+ outputDir="$1"
+ file="$outputDir/gerbil_data.zip"
+fi
+
+url="https://github.com/dice-group/gerbil/releases/download/v1.2.6/gerbil_data.zip"
+
+if [ ! -d "$outputDir/gerbil_data" ]; then
+ mkdir -p "$outputDir/gerbil_data" || error "Could not create gerbil_data directory"
+ mkdir -p "$outputDir/gerbil_data/cache" || error "Could not create gerbil_data/cache directory"
+ if [ ! -f "$file" ]; then
+ echo "Downloading dependencies ... ($url)"
+ #curl --retry 4 -L -o "$file" "$url" # Replaced by wget since curl is not available in our docker image
+ wget -O "$file" "$url"
+
+ if [ ! -f "$file" ]; then
+ error "Couldn't downloading dependency data: $file"
+ else
+ echo "Extracting dependencies ... "
+ unzip "$file" -d "$outputDir"
+ fi
+ fi
+fi
\ No newline at end of file
diff --git a/scripts/download_indexes.sh b/scripts/download_indexes.sh
new file mode 100755
index 000000000..1e2299ceb
--- /dev/null
+++ b/scripts/download_indexes.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Include file (from https://stackoverflow.com/a/12694189)
+DIR="${BASH_SOURCE%/*}"
+if [[ ! -d "$DIR" ]]; then DIR="$PWD"; fi
+source "$DIR/functions.sh"
+
+#####################################################################
+# Check for dbpedia sameAs index
+echo "Checking dbpedia sameAs index..."
+if [ ! -d "gerbil_data/indexes/dbpedia" ]; then
+ echo "Couldn't find a dbpedia sameAs index"
+ if yesno "Should the index be downloaded (~1GB zipped, ~2GB extracted)? (yes/no): "; then
+ mkdir -p "gerbil_data/indexes/dbpedia" || error "Could not create gerbil_data/indexes/dbpedia directory"
+ file="gerbil_data/indexes/dbpedia/dbpedia_index.zip"
+ url="https://hobbitdata.informatik.uni-leipzig.de/gerbil/dbpedia_index_2016.zip"
+ echo "Downloading index ... ($url)"
+ curl --retry 4 -L -o "$file" "$url"
+
+ if [ ! -f "$file" ]; then
+ echo "Couldn't downloading index file: $file"
+ else
+ echo "Extracting index ... "
+ unzip "$file" -d "gerbil_data/indexes/dbpedia"
+ fi
+ fi
+fi
+
+#####################################################################
+# Check for dbpedia entity check index
+echo "Checking dbpedia entity check index..."
+if [ ! -d "gerbil_data/indexes/dbpedia_check" ]; then
+ echo "Couldn't find a dbpedia entity check index"
+ if yesno "Should the index be downloaded (~0.3GB zipped, ~0.7GB extracted)? (yes/no): "; then
+ mkdir -p "gerbil_data/indexes/dbpedia_check" || error "Could not create gerbil_data/indexes/dbpedia_check directory"
+ file="gerbil_data/indexes/dbpedia_check/dbpedia_check_index.zip"
+ url="https://hobbitdata.informatik.uni-leipzig.de/gerbil/dbpedia_check_index_2017.zip"
+ echo "Downloading index ... ($url)"
+ curl --retry 4 -L -o "$file" "$url"
+
+ if [ ! -f "$file" ]; then
+ echo "Couldn't downloading index file: $file"
+ else
+ echo "Extracting index ... "
+ unzip "$file" -d "gerbil_data/indexes/dbpedia_check"
+ fi
+ fi
+fi
\ No newline at end of file
diff --git a/scripts/functions.sh b/scripts/functions.sh
new file mode 100644
index 000000000..cf5811c14
--- /dev/null
+++ b/scripts/functions.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+
+# This script is part of the GERBIL project.
+# It reuses functions from Mitch Frazier (http://www.linuxjournal.com/content/asking-yesno-question-bash-script)
+
+#####################################################################
+# Print warning message.
+
+function warning()
+{
+ echo "$*" >&2
+}
+
+#####################################################################
+# Print error message and exit.
+
+function error()
+{
+ echo "$*" >&2
+ exit 1
+}
+
+
+#####################################################################
+# Ask yesno question.
+#
+# Usage: yesno OPTIONS QUESTION
+#
+# Options:
+# --timeout N Timeout if no input seen in N seconds.
+# --default ANS Use ANS as the default answer on timeout or
+# if an empty answer is provided.
+#
+# Exit status is the answer.
+
+function yesno()
+{
+ local ans
+ local ok=0
+ local timeout=0
+ local default
+ local t
+
+ while [[ "$1" ]]
+ do
+ case "$1" in
+ --default)
+ shift
+ default=$1
+ if [[ ! "$default" ]]; then error "Missing default value"; fi
+ t=$(tr '[:upper:]' '[:lower:]' <<<$default)
+
+ if [[ "$t" != 'y' && "$t" != 'yes' && "$t" != 'n' && "$t" != 'no' ]]; then
+ error "Illegal default answer: $default"
+ fi
+ default=$t
+ shift
+ ;;
+
+ --timeout)
+ shift
+ timeout=$1
+ if [[ ! "$timeout" ]]; then error "Missing timeout value"; fi
+ if [[ ! "$timeout" =~ ^[0-9][0-9]*$ ]]; then error "Illegal timeout value: $timeout"; fi
+ shift
+ ;;
+
+ -*)
+ error "Unrecognized option: $1"
+ ;;
+
+ *)
+ break
+ ;;
+ esac
+ done
+
+ if [[ $timeout -ne 0 && ! "$default" ]]; then
+ error "Non-zero timeout requires a default answer"
+ fi
+
+ if [[ ! "$*" ]]; then error "Missing question"; fi
+
+ while [[ $ok -eq 0 ]]
+ do
+ if [[ $timeout -ne 0 ]]; then
+ if ! read -t $timeout -p "$*" ans; then
+ ans=$default
+ else
+ # Turn off timeout if answer entered.
+ timeout=0
+ if [[ ! "$ans" ]]; then ans=$default; fi
+ fi
+ else
+ read -p "$*" ans
+ if [[ ! "$ans" ]]; then
+ ans=$default
+ else
+ ans=$(tr '[:upper:]' '[:lower:]' <<<$ans)
+ fi
+ fi
+
+ if [[ "$ans" == 'y' || "$ans" == 'yes' || "$ans" == 'n' || "$ans" == 'no' ]]; then
+ ok=1
+ fi
+
+ if [[ $ok -eq 0 ]]; then warning "Valid answers are: yes y no n"; fi
+ done
+ [[ "$ans" = "y" || "$ans" == "yes" ]]
+}
\ No newline at end of file
diff --git a/src/main/java/org/aksw/gerbil/config/GerbilConfiguration.java b/src/main/java/org/aksw/gerbil/config/GerbilConfiguration.java
index 82c423eb6..00798d2c4 100644
--- a/src/main/java/org/aksw/gerbil/config/GerbilConfiguration.java
+++ b/src/main/java/org/aksw/gerbil/config/GerbilConfiguration.java
@@ -16,6 +16,8 @@
*/
package org.aksw.gerbil.config;
+import java.io.File;
+
import org.apache.commons.configuration.CompositeConfiguration;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConfigurationException;
@@ -37,6 +39,7 @@ public class GerbilConfiguration {
private static final Logger LOGGER = LoggerFactory.getLogger(GerbilConfiguration.class);
private static final String DEFAULT_GERBIL_PROPERTIES_FILE_NAME = "gerbil.properties";
+ public static final String GERBIL_PROP_DIR_KEY = "GERBIL_PROP_DIR";
public static final String GERBIL_DATAPATH_PROPERTY_NAME = "org.aksw.gerbil.DataPath";
public static final String GERBIL_VERSION_PROPERTY_NAME = "org.aksw.gerbil.Version";
@@ -52,13 +55,26 @@ public static synchronized Configuration getInstance() {
public static synchronized void loadAdditionalProperties(String fileName) {
try {
- ((CompositeConfiguration) getInstance()).addConfiguration(new PropertiesConfiguration(fileName));
+ ((CompositeConfiguration) getInstance())
+ .addConfiguration(new PropertiesConfiguration(derivePropertiesPath(fileName)));
} catch (ConfigurationException e) {
LOGGER.error("Couldnt load Properties from the properties file (\"" + fileName
+ "\"). This GERBIL instance won't work as expected.", e);
}
}
+ public static String derivePropertiesPath(String fileName) {
+ if (System.getenv().containsKey(GERBIL_PROP_DIR_KEY)) {
+ String temp = System.getenv().get(GERBIL_PROP_DIR_KEY);
+ if (!temp.endsWith(File.separator)) {
+ temp += File.separator;
+ }
+ return temp + fileName;
+ } else {
+ return fileName;
+ }
+ }
+
public static String getGerbilVersion() {
return getInstance().getString(GERBIL_VERSION_PROPERTY_NAME);
}
diff --git a/src/main/java/org/aksw/gerbil/web/config/RootConfig.java b/src/main/java/org/aksw/gerbil/web/config/RootConfig.java
index 46abcb5cd..3905a2ca7 100644
--- a/src/main/java/org/aksw/gerbil/web/config/RootConfig.java
+++ b/src/main/java/org/aksw/gerbil/web/config/RootConfig.java
@@ -59,18 +59,17 @@
import org.aksw.simba.topicmodeling.concurrent.reporter.Reporter;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConversionException;
+import org.apache.jena.rdf.model.Model;
+import org.apache.jena.rdf.model.ModelFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.PropertySource;
import org.springframework.context.support.PropertySourcesPlaceholderConfigurer;
-import org.springframework.core.io.ClassPathResource;
+import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
-import org.apache.jena.rdf.model.Model;
-import org.apache.jena.rdf.model.ModelFactory;
-
/**
* This is the root {@link Configuration} class that is processed by the Spring
* framework and performs the following configurations:
@@ -123,7 +122,8 @@ public class RootConfig {
static @Bean public PropertySourcesPlaceholderConfigurer myPropertySourcesPlaceholderConfigurer() {
PropertySourcesPlaceholderConfigurer p = new PropertySourcesPlaceholderConfigurer();
- Resource[] resourceLocations = new Resource[] { new ClassPathResource("gerbil.properties"), };
+ Resource[] resourceLocations = new Resource[] {
+ new FileSystemResource(GerbilConfiguration.derivePropertiesPath("gerbil.properties")), };
p.setLocations(resourceLocations);
return p;
}
@@ -329,8 +329,7 @@ public static AnnotatorOutputWriter getAnnotatorOutputWriter() {
namespaces.get(0));
// use HTTP based checker
for (String namespace : namespaces) {
- manager.registerEntityChecker(namespace,
- new HttpBasedEntityChecker(namespace));
+ manager.registerEntityChecker(namespace, new HttpBasedEntityChecker(namespace));
}
}
}
diff --git a/start.sh b/start.sh
index c8691be8c..d42052a96 100755
--- a/start.sh
+++ b/start.sh
@@ -1,135 +1,8 @@
#!/bin/bash
-# This script is part of the GERBIL project.
-# It reuses functions from Mitch Frazier (http://www.linuxjournal.com/content/asking-yesno-question-bash-script)
-
-#####################################################################
-# Print warning message.
-
-function warning()
-{
- echo "$*" >&2
-}
-
-#####################################################################
-# Print error message and exit.
-
-function error()
-{
- echo "$*" >&2
- exit 1
-}
+source scripts/functions.sh
-
-#####################################################################
-# Ask yesno question.
-#
-# Usage: yesno OPTIONS QUESTION
-#
-# Options:
-# --timeout N Timeout if no input seen in N seconds.
-# --default ANS Use ANS as the default answer on timeout or
-# if an empty answer is provided.
-#
-# Exit status is the answer.
-
-function yesno()
-{
- local ans
- local ok=0
- local timeout=0
- local default
- local t
-
- while [[ "$1" ]]
- do
- case "$1" in
- --default)
- shift
- default=$1
- if [[ ! "$default" ]]; then error "Missing default value"; fi
- t=$(tr '[:upper:]' '[:lower:]' <<<$default)
-
- if [[ "$t" != 'y' && "$t" != 'yes' && "$t" != 'n' && "$t" != 'no' ]]; then
- error "Illegal default answer: $default"
- fi
- default=$t
- shift
- ;;
-
- --timeout)
- shift
- timeout=$1
- if [[ ! "$timeout" ]]; then error "Missing timeout value"; fi
- if [[ ! "$timeout" =~ ^[0-9][0-9]*$ ]]; then error "Illegal timeout value: $timeout"; fi
- shift
- ;;
-
- -*)
- error "Unrecognized option: $1"
- ;;
-
- *)
- break
- ;;
- esac
- done
-
- if [[ $timeout -ne 0 && ! "$default" ]]; then
- error "Non-zero timeout requires a default answer"
- fi
-
- if [[ ! "$*" ]]; then error "Missing question"; fi
-
- while [[ $ok -eq 0 ]]
- do
- if [[ $timeout -ne 0 ]]; then
- if ! read -t $timeout -p "$*" ans; then
- ans=$default
- else
- # Turn off timeout if answer entered.
- timeout=0
- if [[ ! "$ans" ]]; then ans=$default; fi
- fi
- else
- read -p "$*" ans
- if [[ ! "$ans" ]]; then
- ans=$default
- else
- ans=$(tr '[:upper:]' '[:lower:]' <<<$ans)
- fi
- fi
-
- if [[ "$ans" == 'y' || "$ans" == 'yes' || "$ans" == 'n' || "$ans" == 'no' ]]; then
- ok=1
- fi
-
- if [[ $ok -eq 0 ]]; then warning "Valid answers are: yes y no n"; fi
- done
- [[ "$ans" = "y" || "$ans" == "yes" ]]
-}
-
-#####################################################################
-# Check for dependencies
-echo "Checking dependencies..."
-file="gerbil_data/gerbil_data.zip"
-url="https://github.com/dice-group/gerbil/releases/download/v1.2.6/gerbil_data.zip"
-
-if [ ! -d "gerbil_data" ]; then
- mkdir -p "gerbil_data" || error "Could not create gerbil_data directory"
- mkdir -p "gerbil_data/cache" || error "Could not create gerbil_data/cache directory"
- if [ ! -f "$file" ]; then
- echo "Downloading dependencies ... ($url)"
- curl --retry 4 -L -o "$file" "$url"
-
- if [ ! -f "$file" ]; then
- error "Couldn't downloading dependency data: $file"
- else
- echo "Extracting dependencies ... "
- unzip "$file"
- fi
- fi
-fi
+./scripts/download_data.sh
#####################################################################
# Check for property file
@@ -146,47 +19,7 @@ if [ ! -f "$file" ]; then
echo "##############################################################################" >> $file
fi
-#####################################################################
-# Check for dbpedia sameAs index
-echo "Checking dbpedia sameAs index..."
-if [ ! -d "gerbil_data/indexes/dbpedia" ]; then
- echo "Couldn't find a dbpedia sameAs index"
- if yesno "Should the index be downloaded (~1GB zipped, ~2GB extracted)? (yes/no): "; then
- mkdir -p "gerbil_data/indexes/dbpedia" || error "Could not create gerbil_data/indexes/dbpedia directory"
- file="gerbil_data/indexes/dbpedia/dbpedia_index.zip"
- url="https://hobbitdata.informatik.uni-leipzig.de/gerbil/dbpedia_index_2016.zip"
- echo "Downloading index ... ($url)"
- curl --retry 4 -L -o "$file" "$url"
-
- if [ ! -f "$file" ]; then
- echo "Couldn't downloading index file: $file"
- else
- echo "Extracting index ... "
- unzip "$file" -d "gerbil_data/indexes/dbpedia"
- fi
- fi
-fi
-
-#####################################################################
-# Check for dbpedia entity check index
-echo "Checking dbpedia entity check index..."
-if [ ! -d "gerbil_data/indexes/dbpedia_check" ]; then
- echo "Couldn't find a dbpedia entity check index"
- if yesno "Should the index be downloaded (~0.3GB zipped, ~0.7GB extracted)? (yes/no): "; then
- mkdir -p "gerbil_data/indexes/dbpedia_check" || error "Could not create gerbil_data/indexes/dbpedia_check directory"
- file="gerbil_data/indexes/dbpedia_check/dbpedia_check_index.zip"
- url="https://hobbitdata.informatik.uni-leipzig.de/gerbil/dbpedia_check_index_2017.zip"
- echo "Downloading index ... ($url)"
- curl --retry 4 -L -o "$file" "$url"
-
- if [ ! -f "$file" ]; then
- echo "Couldn't downloading index file: $file"
- else
- echo "Extracting index ... "
- unzip "$file" -d "gerbil_data/indexes/dbpedia_check"
- fi
- fi
-fi
+./scripts/download_indexes.sh
echo "Building and starting GERBIL..."
mvn clean org.apache.tomcat.maven:tomcat7-maven-plugin:2.2:run -Dmaven.tomcat.port=1234
diff --git a/start_in_docker.sh b/start_in_docker.sh
new file mode 100755
index 000000000..bbfdba31f
--- /dev/null
+++ b/start_in_docker.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+COPY_DIRS="${GERBIL_COPY_DIRS:-true}" # If variable not set or null, use default.
+COPY_PROPS="${GERBIL_COPY_PROPS:-true}"
+
+if [ $COPY_DIRS = 'true' ]; then
+ echo "Copying data directories if missing..."
+ # copy directories that do not already exist
+ cp -r -u /data/gerbil_data/configs/* gerbil_data/configs/
+ cp -r -u /data/gerbil_data/datasets/* gerbil_data/datasets/
+ cp -r -u /data/gerbil_data/resources/* gerbil_data/resources/
+ cp -r -u /data/gerbil_data/systems/* gerbil_data/systems/
+fi
+
+if [ $COPY_PROPS = 'true' ]; then
+ echo "Copying properties files if missing..."
+ # Copy properties files
+ cp -v -u /data/properties/* /usr/local/tomcat/gerbil_properties/
+fi
+
+# Start Tomcat
+echo "Starting Tomcat..."
+catalina.sh run
\ No newline at end of file
From 5d9060e729709995d443e3fefb0c8c0d3f328c63 Mon Sep 17 00:00:00 2001
From: Micha
Date: Tue, 10 Aug 2021 20:44:00 +0200
Subject: [PATCH 03/10] Fixed logo.
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 8765a7cfa..da2fc643b 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ GERBIL
[![BCH compliance](https://bettercodehub.com/edge/badge/AKSW/gerbil)](https://bettercodehub.com/)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/5634b1a7508f475886b027feac9da228)](https://www.codacy.com/gh/dice-group/gerbil/dashboard?utm_source=github.com&utm_medium=referral&utm_content=dice-group/gerbil&utm_campaign=Badge_Grade)
-
+
### General Information
From e3513f92f5ff9e1c6b55b067714709aa29e3cede Mon Sep 17 00:00:00 2001
From: Micha
Date: Tue, 10 Aug 2021 20:48:37 +0200
Subject: [PATCH 04/10] Fixed version number.
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 48dc8d466..f490b42ed 100644
--- a/pom.xml
+++ b/pom.xml
@@ -14,7 +14,7 @@
4.0.0
org.aksw
gerbil
- 1.2.8-SNAPSHOT
+ 1.2.9-SNAPSHOT
General Entity Annotator Benchmark
This project is a benchmarking framework for entity annotation and disambiguation tools.
2014
From 4560eb7910bec21ebea1f74d7dd0e1c7c1ce283a Mon Sep 17 00:00:00 2001
From: Micha
Date: Wed, 11 Aug 2021 14:24:18 +0200
Subject: [PATCH 05/10] Fixed codacy issues.
---
scripts/functions.sh | 6 +++---
start_in_docker.sh | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/scripts/functions.sh b/scripts/functions.sh
index cf5811c14..94b3ffc4c 100644
--- a/scripts/functions.sh
+++ b/scripts/functions.sh
@@ -48,7 +48,7 @@ function yesno()
shift
default=$1
if [[ ! "$default" ]]; then error "Missing default value"; fi
- t=$(tr '[:upper:]' '[:lower:]' <<<$default)
+ t=$(tr "[:upper:]" "[:lower:]" <<<$default)
if [[ "$t" != 'y' && "$t" != 'yes' && "$t" != 'n' && "$t" != 'no' ]]; then
error "Illegal default answer: $default"
@@ -84,7 +84,7 @@ function yesno()
while [[ $ok -eq 0 ]]
do
if [[ $timeout -ne 0 ]]; then
- if ! read -t $timeout -p "$*" ans; then
+ if ! read -t "$timeout" -p "$*" ans; then
ans=$default
else
# Turn off timeout if answer entered.
@@ -96,7 +96,7 @@ function yesno()
if [[ ! "$ans" ]]; then
ans=$default
else
- ans=$(tr '[:upper:]' '[:lower:]' <<<$ans)
+ ans=$(tr "[:upper:]" "[:lower:]" <<<$ans)
fi
fi
diff --git a/start_in_docker.sh b/start_in_docker.sh
index bbfdba31f..a77bd25dd 100755
--- a/start_in_docker.sh
+++ b/start_in_docker.sh
@@ -3,7 +3,7 @@
COPY_DIRS="${GERBIL_COPY_DIRS:-true}" # If variable not set or null, use default.
COPY_PROPS="${GERBIL_COPY_PROPS:-true}"
-if [ $COPY_DIRS = 'true' ]; then
+if [ $COPY_DIRS = "true" ]; then
echo "Copying data directories if missing..."
# copy directories that do not already exist
cp -r -u /data/gerbil_data/configs/* gerbil_data/configs/
@@ -12,7 +12,7 @@ if [ $COPY_DIRS = 'true' ]; then
cp -r -u /data/gerbil_data/systems/* gerbil_data/systems/
fi
-if [ $COPY_PROPS = 'true' ]; then
+if [ $COPY_PROPS = "true" ]; then
echo "Copying properties files if missing..."
# Copy properties files
cp -v -u /data/properties/* /usr/local/tomcat/gerbil_properties/
From fd0f0b0ee6e128d19020b74a3fd196d8955b6f23 Mon Sep 17 00:00:00 2001
From: Micha
Date: Wed, 11 Aug 2021 14:35:06 +0200
Subject: [PATCH 06/10] Fixed codacy issues.
---
scripts/functions.sh | 4 ++--
start_in_docker.sh | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/scripts/functions.sh b/scripts/functions.sh
index 94b3ffc4c..542c1fa13 100644
--- a/scripts/functions.sh
+++ b/scripts/functions.sh
@@ -48,7 +48,7 @@ function yesno()
shift
default=$1
if [[ ! "$default" ]]; then error "Missing default value"; fi
- t=$(tr "[:upper:]" "[:lower:]" <<<$default)
+ t=$(tr '[:upper:]' '[:lower:]' <<<"$default")
if [[ "$t" != 'y' && "$t" != 'yes' && "$t" != 'n' && "$t" != 'no' ]]; then
error "Illegal default answer: $default"
@@ -96,7 +96,7 @@ function yesno()
if [[ ! "$ans" ]]; then
ans=$default
else
- ans=$(tr "[:upper:]" "[:lower:]" <<<$ans)
+ ans=$(tr '[:upper:]' '[:lower:]' <<<"$ans")
fi
fi
diff --git a/start_in_docker.sh b/start_in_docker.sh
index a77bd25dd..288819f24 100755
--- a/start_in_docker.sh
+++ b/start_in_docker.sh
@@ -3,7 +3,7 @@
COPY_DIRS="${GERBIL_COPY_DIRS:-true}" # If variable not set or null, use default.
COPY_PROPS="${GERBIL_COPY_PROPS:-true}"
-if [ $COPY_DIRS = "true" ]; then
+if [ $COPY_DIRS -eq "true" ]; then
echo "Copying data directories if missing..."
# copy directories that do not already exist
cp -r -u /data/gerbil_data/configs/* gerbil_data/configs/
@@ -12,7 +12,7 @@ if [ $COPY_DIRS = "true" ]; then
cp -r -u /data/gerbil_data/systems/* gerbil_data/systems/
fi
-if [ $COPY_PROPS = "true" ]; then
+if [ $COPY_PROPS -eq "true" ]; then
echo "Copying properties files if missing..."
# Copy properties files
cp -v -u /data/properties/* /usr/local/tomcat/gerbil_properties/
From a323381ce4d166663c9c2f2213f1406f9fb59816 Mon Sep 17 00:00:00 2001
From: Micha
Date: Thu, 12 Aug 2021 11:57:17 +0200
Subject: [PATCH 07/10] Fixed codacy issues.
---
start_in_docker.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/start_in_docker.sh b/start_in_docker.sh
index 288819f24..f5fdf93ec 100755
--- a/start_in_docker.sh
+++ b/start_in_docker.sh
@@ -3,7 +3,7 @@
COPY_DIRS="${GERBIL_COPY_DIRS:-true}" # If variable not set or null, use default.
COPY_PROPS="${GERBIL_COPY_PROPS:-true}"
-if [ $COPY_DIRS -eq "true" ]; then
+if [[ "${COPY_DIRS}" == "true" ]]; then
echo "Copying data directories if missing..."
# copy directories that do not already exist
cp -r -u /data/gerbil_data/configs/* gerbil_data/configs/
@@ -12,7 +12,7 @@ if [ $COPY_DIRS -eq "true" ]; then
cp -r -u /data/gerbil_data/systems/* gerbil_data/systems/
fi
-if [ $COPY_PROPS -eq "true" ]; then
+if [[ "${COPY_PROPS}" == "true" ]]; then
echo "Copying properties files if missing..."
# Copy properties files
cp -v -u /data/properties/* /usr/local/tomcat/gerbil_properties/
From 40b26451257f5783738dec4010356a348f11310e Mon Sep 17 00:00:00 2001
From: Micha
Date: Thu, 12 Aug 2021 13:01:38 +0200
Subject: [PATCH 08/10] improved docker image build script.
---
docker_build.sh | 42 +++++++++++++++++++++++++++++++++++-------
1 file changed, 35 insertions(+), 7 deletions(-)
diff --git a/docker_build.sh b/docker_build.sh
index 96f4c1b34..cd3ad2bfc 100755
--- a/docker_build.sh
+++ b/docker_build.sh
@@ -1,18 +1,46 @@
#!/bin/bash
-export PROJECT="gerbil"
+PROJECT="gerbil"
VERSION=$(mvn -q -Dexec.executable="echo" -Dexec.args='${project.version}' --non-recursive org.codehaus.mojo:exec-maven-plugin:1.3.1:exec)
+echo "Building image for version ${VERSION} ..."
# build the docker container
-docker build -t dicegroup/$PROJECT .
+#docker build -t "dicegroup/${PROJECT}:latest" .
-docker tag dicegroup/$PROJECT dicegroup/$PROJECT:$VERSION
-docker tag dicegroup/$PROJECT dicegroup/$PROJECT:latest
+tagged_versions=("latest")
+
+# If this is a snapshot version
+if [[ "${VERSION}" == *-SNAPSHOT ]]
+then
+ echo "The image will be tagged as snapshot"
+ #docker tag dicegroup/$PROJECT:latest "dicegroup/${PROJECT}:snapshot"
+ tagged_versions+=("snapshot")
+else
+ # Tag the image with the versions by splitting the version string
+ IFS="."
+ read -r -a version_array <<< "${VERSION}"
+ TAG=""
+ for i in "${version_array[@]}"
+ do
+ # If the tag is empty so far
+ if [[ -z "${TAG}" ]]
+ then
+ TAG="${i}"
+ else
+ TAG="${TAG}.${i}"
+ fi
+ echo "The image will be tagged as ${TAG}"
+ docker tag dicegroup/$PROJECT:latest "dicegroup/${PROJECT}:${TAG}"
+ tagged_versions+=("${TAG}")
+ done
+fi
# upload the image
if [[ $1 == "--upload" ]]
- then
+then
echo 'uploading...'
- sudo docker push dicegroup/$PROJECT:$VERSION
- sudo docker push dicegroup/$PROJECT:latest
+ for i in "${tagged_versions[@]}"
+ do
+ sudo docker push "dicegroup/${PROJECT}:${i}"
+ done
fi
\ No newline at end of file
From 682a34a3d89eb596837ac237bccafdff135b908d Mon Sep 17 00:00:00 2001
From: Micha
Date: Thu, 12 Aug 2021 13:13:37 +0200
Subject: [PATCH 09/10] increased junit version.
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 59919df5e..a2a78c6fc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -22,7 +22,7 @@
1.8
1.7.6
- 4.13
+ 4.13.2
UTF-8
5.3.9
3.1.0
From dcb86970afc76bc4c75732300cf32fda956af67d Mon Sep 17 00:00:00 2001
From: Micha
Date: Mon, 20 Sep 2021 12:51:47 +0200
Subject: [PATCH 10/10] Removed deprecated run-gerbil.sh script. Closes #398
---
run-gerbil.sh | 56 ---------------------------------------------------
1 file changed, 56 deletions(-)
delete mode 100755 run-gerbil.sh
diff --git a/run-gerbil.sh b/run-gerbil.sh
deleted file mode 100755
index fa9f87e83..000000000
--- a/run-gerbil.sh
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/bin/bash
-
-export gerbil_home=~/gerbil/
-
-gerbil_data=$gerbil_home/gerbil_data
-
-data_url="https://github.com/dice-group/gerbil/releases/download/v1.2.6/gerbil_data.zip"
-mapping_url="http://139.18.2.164/mroeder/gerbil/dbpedia_index.zip"
-index_url="http://139.18.2.164/mroeder/gerbil/dbpedia_check_index.zip"
-
-# Check for dependencies
-if [ ! -e $gerbil_data ]
-then
- echo "gerbil_data is not available locally, downloading.."
-
- zipped_data_file="/tmp/gerbil_data.zip"
- zipped_mapping_file="/tmp/dbpedia_index.zip"
- zipped_index_file="/tmp/dbpedia_check_index.zip"
-
- mapping_file="$gerbil_data/indexes/dbpedia"
- index_file="$gerbil_data/indexes/dbpedia_check"
-
- echo " .. downloading gerbil_data file to $zipped_data_file"
- curl --retry 4 -L -o "$zipped_data_file" "$data_url"
-
- echo "extracting .."
- mkdir -p "$gerbil_home"
- unzip "$zipped_data_file" -d "$gerbil_home"
- mkdir -p "$gerbil_data/indexes/"
-
- echo " .. downloading mappings to $zipped_mapping_file"
- curl --retry 4 -L -o "$zipped_mapping_file" "$mapping_url"
-
- echo "extracting .."
- unzip "$zipped_mapping_file" -d "$mapping_file"
-
- echo " .. downloading indexes to $zipped_index_file"
- curl --retry 4 -L -o "$zipped_index_file" "$index_url"
-
- echo "extracting .."
- unzip "$zipped_index_file" -d "$index_file"
-
- echo "download complete"
-fi
-
-echo "starting the docker container"
-
-docker pull philippkuntschik/gerbil:latest
-
-docker run -d \
- --rm \
- --name gerbil \
- -p 8080:8080 \
- -v $gerbil_data/:/gerbil_data \
- -v /var/logs/:/logs/ \
- philippkuntschik/gerbil