Skip to content

Commit

Permalink
Add url to SemanticScholar page for each paper
Browse files Browse the repository at this point in the history
  • Loading branch information
davidnmora committed Jan 5, 2018
1 parent 488e509 commit 9705cb7
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 126 deletions.
93 changes: 49 additions & 44 deletions data/2-format-graph-data-from-authors-papers.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import json
from pprint import pprint


papersDict = {}
coreAuthors = {}
glob = {"updatingIndex": 0, "parsingCoreAuthors": True } # keeps track of what index in the array you're adding to (used as "node descriptor" for links)


# IMPORT JSON FILE
f = open("papers-by-title.json", "r") # 123 articles (each which contain their own citations, refences)
papersDict = json.loads(f.read())
f.close()
with open("papers-by-title-SMALL.json", "r") as f: # articles (each which contain their own citations, refences)
papersDict = json.loads(f.read())

# RETRIEVE CORE AUTHORS
f = open("core-authors-list.json", "r") # 123 articles (each which contain their own citations, refences)
coreAuthors = json.loads(f.read())
f.close()
with open("core-authors-list.json", "r") as f:
coreAuthors = json.loads(f.read())

# FOR SIMPLE TESTING PURPOSES:
# papersDict = { "A-node": {
Expand All @@ -29,48 +33,16 @@
# }
# }

paperIdToIndex = {} # # paper title -> index
paperIdToIndex = {} # paper title -> index
nodeDict = {} # paper index -> node object
linkArray = []

def generateGraphData():
glob = {"updatingIndex": 0, "parsingCoreAuthors": True } # keeps track of what index in the array you're adding to (used as "node descriptor" for links)
def nodeAdded(paper):
paperId = paper["paperId"]
# FILTER: for citations/refs, keep only if isInfluential
influential = paper.get('isInfluential', True)
if influential and paperId not in paperIdToIndex:
paperIdToIndex[paperId] = glob["updatingIndex"]
newNode = {
"title": paper["title"],
"year" : paper["year"],
"keyPhrases": [],
"index": glob["updatingIndex"],
"id": glob["updatingIndex"],
"paperId": paper["paperId"]
}
if glob["parsingCoreAuthors"]:
newNode["influentialCitationCount"] = paper["influentialCitationCount"]
authorsDict = {}
for authorObj in paper["authors"]:
authorsDict[authorObj["authorId"]] = authorObj["name"] # hash map for fast access when filtering
if authorObj["authorId"] in coreAuthors:
newNode["coreAuthor"] = authorObj["authorId"] # ISSUE: if two core authors on same paper, only list first as singular coreAuthor
newNode["authors"] = authorsDict
nodeDict[glob["updatingIndex"]] = newNode
glob["updatingIndex"] += 1
return True
return influential

def addEdge(source, target):
linkArray.append({"source": source, "target": target})

# first add all papers authored by the core authors
# which contain the highest resolution info
for paperTitle in papersDict:
paper = papersDict[paperTitle]
# add to node
nodeAdded(paper)
addNode(paper)

# then add all the core authors ref/citations (overlap won't overwrite)
glob["parsingCoreAuthors"] = False
Expand All @@ -80,19 +52,19 @@ def addEdge(source, target):
hubPaperIndex = paperIdToIndex[paper["paperId"]]
# add all in going links (citations)
for citedPaper in paper["citations"]:
nodeAdded(citedPaper)
addNode(citedPaper)
paperId = citedPaper["paperId"]
if paperId in paperIdToIndex and citedPaper.get("isInfluential", True):
addEdge(paperIdToIndex[paperId], hubPaperIndex)
# add all out going links (references)
for refPaper in paper["references"]:
nodeAdded(refPaper)
addNode(refPaper)
paperId = refPaper["paperId"]
if paperId in paperIdToIndex and refPaper.get("isInfluential", True):
addEdge(hubPaperIndex, paperIdToIndex[paperId])

pprint(len(nodeDict))
pprint(len(linkArray))
print("Node count: " + str(len(nodeDict)))
print("Link count: " + str(len(linkArray)))

# convert Dict to List
nodeList = [None] * len(nodeDict)
Expand All @@ -110,5 +82,38 @@ def addEdge(source, target):
# for link in linkArray:
# gd.write("'" + str(link["source"]) + "','" + str(link["target"]) + "'\n")
# gd.close()
return

# ---Helper functions---

def addNode(paper):
paperId = paper["paperId"]
# FILTER: for citations/refs, keep only if isInfluential
influential = paper.get('isInfluential', True)
if influential and paperId not in paperIdToIndex:
paperIdToIndex[paperId] = glob["updatingIndex"]
newNode = {
"title": paper["title"],
"year" : paper["year"],
"keyPhrases": [],
"index": glob["updatingIndex"],
"id": glob["updatingIndex"],
"paperId": paper["paperId"],
"linkToPaper": paper["url"]
}
if glob["parsingCoreAuthors"]:
newNode["influentialCitationCount"] = paper["influentialCitationCount"]
authorsDict = {}
for authorObj in paper["authors"]:
authorsDict[authorObj["authorId"]] = authorObj["name"] # hash map for fast access when filtering
if authorObj["authorId"] in coreAuthors:
newNode["coreAuthor"] = authorObj["authorId"] # ISSUE: if two core authors on same paper, only list first as singular coreAuthor
newNode["authors"] = authorsDict
nodeDict[glob["updatingIndex"]] = newNode
glob["updatingIndex"] += 1
return

def addEdge(source, target):
linkArray.append({"source": source, "target": target})

generateGraphData()
2 changes: 1 addition & 1 deletion data/graph-data.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions js/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ const getCoreAuthorColor = (d) => d.coreAuthor ? authorColor[d.coreAuthor] : non
const filterGraph = () => {
nodes = graph.nodes.filter(node => shouldKeepNode(node));
links = graph.links.filter(link => shouldKeepLink(graph.nodesById, link));
restart();
updateVis();
}

// Filter predicates
Expand Down Expand Up @@ -195,7 +195,7 @@ const makeAuthorInactive = (authorId) => {
}

// 3. UPDATE GRAPH AFTER FILTERING DATA -------------------------------------------------------------------------
function restart() {
function updateVis() {
if(!simulation) {
simulation = d3
.forceSimulation()
Expand Down
79 changes: 0 additions & 79 deletions project-plans.txt

This file was deleted.

0 comments on commit 9705cb7

Please sign in to comment.