-
Notifications
You must be signed in to change notification settings - Fork 3
/
utils.py
61 lines (43 loc) · 1.83 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import csv
import pandas as pd
import os
def detect_delimiter(file_path):
with open(file_path, 'r', newline='', encoding='utf-8') as file:
dialect = csv.Sniffer().sniff(file.read(1024))
return dialect.delimiter
def load_data(file_path):
delimiter = detect_delimiter(file_path)
df = pd.read_csv(file_path, delimiter=delimiter, encoding="UTF-8")
return df
def getPercentage(num,den):
return num * 100 / den
def get_erih_plus_dict(erih_df): #method to load dictionary
erih_plus_dict = {}
for idx, row in erih_df.iterrows():
erih_plus_dict[row["Print ISSN"]] = row["Journal ID"]
erih_plus_dict[row["Online ISSN"]] = row["Journal ID"]
return erih_plus_dict
def process_doaj_file(doaj_df, merged_data): #this is called in oc metaprocessor
new_doaj = doaj_df.iloc[0:, [5, 6, 10]]
# Create a dictionary of Open Access ISSNs
open_access_dict = {}
for index, row in new_doaj.iterrows():
open_access_dict[row['Journal ISSN (print version)']] = True
open_access_dict[row['Journal EISSN (online version)']] = True
open_access_keys = list(open_access_dict.keys())
merged_data['Open_Access'] = "Unknown"
for idx, row in merged_data["issn"].items():
for el in row[1:-1].split(", "):
el = el.replace("'", "")
if el in open_access_keys:
merged_data.at[idx, 'Open_Access'] = True
return merged_data
def save_to_results(df, filename):
outdir = os.path.join('./results/', os.path.dirname(filename))
#outdir = './results/'
if not os.path.exists(outdir):
os.mkdir(outdir)
#fullname = os.path.join(outdir, filename)
#fullname = os.path.join('./results/', filename)
fullname = os.path.join(outdir, os.path.basename(filename))
df.to_csv(fullname, index= False)