-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding transformation from Google Docs
- Loading branch information
1 parent
ac3b27f
commit 08d2c94
Showing
2 changed files
with
45 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Transform from Google Docs | ||
|
||
This script will read a **published** google docs spreadsheet, configured according to a standard template, and will parse out selected sheets. | ||
|
||
It caches entities as it reads across sheets, so if a company is discovered across multiple sheets (for example) it will assign the same identifier each time. | ||
|
||
## Usage | ||
|
||
``` | ||
python transform-from-gdocs.py <SPREADSHEET URL> | ||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import sys, os | ||
import re | ||
import requests | ||
import json | ||
sys.path.insert(1, os.path.join(sys.path[0], '../../modules/')) | ||
from taglifter import TagLifter | ||
from rdflib import Graph | ||
|
||
|
||
sheets_to_parse = ['Sources','Companies and Groups','Projects, sites & companies','Payments','Production'] | ||
|
||
|
||
tl = TagLifter(ontology = "../../ontology/resource-projects-ontology.rdf",base="http://resourceprojects.org/",source_meta={}) | ||
|
||
gdoc = str(sys.argv[1]) | ||
result = re.search("([-\w]{25,})", gdoc) | ||
key = result.group(0) | ||
|
||
sheetlist = requests.get('https://spreadsheets.google.com/feeds/worksheets/'+key+'/public/full?alt=json') | ||
|
||
sheetjson = json.loads(sheetlist.text) | ||
|
||
for entry in sheetjson['feed']['entry']: | ||
if entry['title']['$t'] in sheets_to_parse: | ||
for link in entry['link']: | ||
if link['type'] == "text/csv": | ||
print(link['href']) | ||
print("Loading " + entry['title']['$t']) | ||
tl.graph = Graph() | ||
tl.load_data(link['href']) | ||
tl.build_graph() | ||
tl.graph.serialize(format='turtle',destination="data/"+entry['title']['$t']+".ttl") | ||
|