-
Notifications
You must be signed in to change notification settings - Fork 1
/
gdeltdl.sh
executable file
·55 lines (28 loc) · 1.41 KB
/
gdeltdl.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/bin/bash
gdelt_url="http://data.gdeltproject.org/gdeltv2/masterfilelist.txt"
# key value to use to submit to redis-server
key="gdelt"
# Adjust this variable to download different time spans.
# Vary resolutions to year, month or day by including YYYY, YYYYMM or YYYYMMDD
#content_regexp="gdeltv2/YYYY[0-9].*.export.CSV.zip"
#content_regexp="gdelv2/YYYYYMM[0-9].*.export.CSV.zip"
#content_regexp="gdeltv2/YYYYMMDD[0-9].*.export.CSV.zip"
content_regexp="gdeltv2/201601[0-9].*.export.CSV.zip"
redis_server="localhost"
redis_port=6379
content=$(curl -v --silent ${gdelt_url} --stderr - | grep $content_regexp)
read -d "\n" -a content_components <<< "$content"
n_elements=${#content_components[@]}
for ((n=0;n<${n_elements};n=n+3)) ; do
current_url="${content_components[n+2]}"
IFS='/' read -a url_components <<< "$current_url"
compressed_file_name="${url_components[4]}"
IFS='.' read -a file_components <<< "$compressed_file_name"
csv_file_name="${file_components[0]}.${file_components[1]}.${file_components[2]}"
curl --silent $current_url > /tmp/${compressed_file_name}
unzip -p /tmp/${compressed_file_name} ${csv_file_name} > /tmp/${csv_file_name}
echo ${current_url} " --> " ${csv_file_name}
./loadgdelt -k ${key} -s ${redis_server} -p ${redis_port} -f /tmp/${csv_file_name} &>/dev/null
rm -rf /tmp/${compressed_file_name} /tmp/${csv_file_name}
done
echo "no. files processed" "${#content_components[@]}"