Skip to content

Commit

Permalink
Adding code to strip spaces from labels
Browse files Browse the repository at this point in the history
  • Loading branch information
timgdavies committed Jul 22, 2015
1 parent 6ab5c78 commit 8a2d5d1
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 27 deletions.
30 changes: 25 additions & 5 deletions modules/taglifter.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ def map_tags(self):
new_key = self.clean_string(self.class_title(key))
mapping[key] = new_key[0].lower()+new_key[1:]
if tag_count > 3: # We assume we've found the tag row once we've got a row with more than 3 tags in
#Remove the tag row
data = data.drop(line)
#Remove all the rows above
for rmrow in range(0,line+1):
data = data.drop(rmrow)
break

if(tag_count < 3):
Expand Down Expand Up @@ -113,7 +114,20 @@ def get_country(self,row,path="#country",return_default = True):
country = "unknown"

return country.lower()

# ToDo - refactor in future
def get_country_code_from_name(self,name):
try:
country = self.country_cache[name]
except KeyError:
country = countrycode(codes=[name],origin='country_name',target="iso2c")[0]
if(len(country)==2):
self.country_cache[name] = country
else:
country = random_string()
self.country_cache[name] = country

return country.lower()

def get_language(self,row):
lang = row.get("#language",default=self.default_language)
Expand Down Expand Up @@ -159,12 +173,18 @@ def generate_identifier(self,row,path,entity_type,country = "xx",lang="en"):
identifier = country + "/" + self.generate_project_identifier(row[path])
elif entity_type == "company":
identifier = random_string()
elif entity_type == "group":
identifier = self.clean_string(row[path]).strip().lower() + "-" + random_string()[0:4]
elif entity_type == "source":
identifier = self.clean_string(row[path]).strip()
elif entity_type == "commodity":
identifier = "local/" + self.clean_string(row[path]).strip()
elif entity_type == "contributor":
identifier = random_string()
elif entity_type == "country":
identifier = self.get_country(row,path)
identifier = self.get_country_code_from_name(self.clean_string(row[path]).strip())
elif entity_type == "paymentType":
identifier = self.get_country(row,path)
else:
identifier = country + "/" + random_string()

Expand Down Expand Up @@ -314,9 +334,9 @@ def build_graph(self):
else:
label_rel = SKOS.altLabel
if (current_path + "+" + col_lang) in row.keys():
self.graph.add((entity,label_rel,Literal(row[current_path + "+" + col_lang],lang=col_lang)))
self.graph.add((entity,label_rel,Literal(row[current_path + "+" + col_lang].strip(),lang=col_lang)))
elif current_path in row.keys():
self.graph.add((entity,label_rel,Literal(row[current_path],lang=col_lang)))
self.graph.add((entity,label_rel,Literal(row[current_path].strip(),lang=col_lang)))

# First check if we can make a relationship between this entity, and it's parent, or nearest neighbour
if not last_entity == entity:
Expand Down
22 changes: 0 additions & 22 deletions process/statoil/prov.n3

This file was deleted.

0 comments on commit 8a2d5d1

Please sign in to comment.