forked from dbpedia/distributed-extraction-framework
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run-extraction-test
executable file
·33 lines (26 loc) · 1.34 KB
/
run-extraction-test
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/bin/bash
# Performs both normal sequential extraction and distributed extraction and compares the outputs.
CONFIG_FILE="$1"
SPARK_CONF_FILE="$2"
echo "===================================================================="
echo "Running sequential extraction"
echo "===================================================================="
./run seq-extraction $CONFIG_FILE
mkdir /tmp/dbpedia-test-seq-extraction
mv `grep base-dir $CONFIG_FILE | sed -ne 's/^base-dir=//p'`/*wiki/*/*wiki*.gz /tmp/dbpedia-test-seq-extraction
echo "===================================================================="
echo "Running distributed extraction"
echo "===================================================================="
./run extraction $CONFIG_FILE $SPARK_CONF_FILE
mkdir /tmp/dbpedia-test-par-extraction
cp -rf `grep base-dir $CONFIG_FILE | sed -ne 's/^base-dir=//p'`/*wiki/*/*wiki*.gz /tmp/dbpedia-test-par-extraction/
echo "===================================================================="
echo "Computing diffs:"
echo "===================================================================="
diffs=`diff <(gzip -dc /tmp/dbpedia-test-seq-extraction/*.gz | grep -v "^#" | sort) <(gzip -dc /tmp/dbpedia-test-par-extraction/*wiki*.gz/part*.gz | grep -v "^#" | sort)`
if [ -z "$diffs" ]; then
echo "Outputs match!"
else
echo $diffs
fi
rm -rf /tmp/dbpedia-test-???-extraction