-
Notifications
You must be signed in to change notification settings - Fork 0
/
new_urls.py
24 lines (18 loc) · 860 Bytes
/
new_urls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import json
from configparser import ConfigParser
''' This script prints a list of urls for papers that has been labelled mpc but is not yet listed as mpc papers.
'''
def eprintId(labelled_paper):
return str(labelled_paper['year']) + '/' + str(labelled_paper['serial']).rjust(3, '0')
# Read path to labelled papers and the web dir from config
config = ConfigParser()
config.read('config.cfg')
labelled_path = config.get('Data', 'labelled')
web_path = config.get('Web', 'web')
with open (labelled_path) as labelled_file:
labelled_papers = json.load(labelled_file)
labelled_mpc_papers = [p for p in labelled_papers if p['mpc']]
with open (web_path + "/scripts/papers.json") as mpc_file:
papers = json.load(mpc_file)
new_urls = [eprintId(p) for p in labelled_mpc_papers if(not any(q['id'] == eprintId(p) for q in papers))]
print(" ".join(new_urls))