Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update scholar.py to help collecting citation statistics #87

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
63 changes: 63 additions & 0 deletions Pubmed_searchkeywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from Bio import Entrez
import types
import json
import pdb
from os import path
from wordcloud import WordCloud

Entrez.email = '[email protected]'
list_json=[]
list_txt=[]

def get_json():
with open('title.json') as json_file:
data = json.load(json_file) #list
for m in data:
m = m.replace("title={","")
m = m.replace("},","")
m = m.strip()
list_json.append(m)

def search(query):
handle = Entrez.esearch(db='pubmed',
sort='relevance',
retmax='1',
retmode='xml',
term=query)
results= Entrez.read(handle)
list=results.get('IdList')
str = ''.join(list)
return str

def getkeywords(id_number):
file = open('constitution.txt', 'w')
handle = Entrez.efetch(db="pubmed", id=id_number,rettype="abstract", retmode="xml")
a=handle.read()
b=a.split("\n")
for c in b:
if "Keyword MajorTopicYN" in c: #string
c = c.replace('<Keyword MajorTopicYN="N">',"")
c = c.replace("</Keyword>","")
list_txt.append(c)
str_change = ''.join(list_txt)
file.write(str_change)
file.close()



if __name__=="__main__":
get_json()
for i in list_json:
str = search(i)
getkeywords(str)
d = path.dirname(__file__)
text = open(path.join(d, 'constitution.txt')).read()
wordcloud = WordCloud().generate(text)
import matplotlib.pyplot as plt
plt.imshow(wordcloud)
plt.axis("off")
wordcloud = WordCloud(max_font_size=40).generate(text)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
15 changes: 15 additions & 0 deletions Usage Instructions
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
(1)To get the whole articles which cited a given article:
1、 get the cited number on google scholar, eg "https://scholar.google.com/scholar?oi=bibs&hl=en&cites=6675397154864859782&as_sdt=5
https://docs.google.com/document/d/1foKQvjIFm8T62-flZ7IWC23g4t52Ad_o4kaLPirUtpI/", and the cited number is "6675397154864859782"
2、 input the following code in your terminal python scholar.py -c 100 -T 6675397154864859782 --citation "bt"
100 represents the number of article you want to get from google scholar
citation "bt" represents get the bibtex of every article
Then all the "Bibtex" information will be download into a jsaon file

(2)To figure out what type of citations it is(journal,conference proceeding or something else) and calculate statistics for each journal title
Run the program journal_name.py

(3)To search the keywords of every journal, run Pubmed_searchkeywords.py. It will get the keywords on Pubmed.



1 change: 1 addition & 0 deletions bibtex.json

Large diffs are not rendered by default.

51 changes: 51 additions & 0 deletions journal_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import matplotlib.pyplot as plt
plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
import json

def get_json():
jour=0
conf=0
list=[]
diction={}
number=[]
name=[]
with open('bibtex1.json') as json_file:
data = json.load(json_file)
for a in data:
b = a.split('\n')
for m in b:
if "journal={" in m:
jour=jour+1
diction[m]= diction.get(m,0)+1
elif "booktitle={" in m:
conf= conf+1
diction[m]= diction.get(m,0)+1
elif "title" in m:
list.append(m)
print ("the number of journal is:",jour)
print ("the number of conference is:",conf)
print (list)
for key in diction:
number.append(diction[key])
key = key.replace("journal={","")
key = key.replace("},","")
name.append(key)
plt.rcdefaults()
fig, ax = plt.subplots()

y_pos = np.arange(len(name))
ax.barh(y_pos, number,
color='blue')
ax.set_yticks(y_pos)
ax.set_yticklabels(name)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Number')
ax.set_title('How many times a journal has show up')

plt.show()


if __name__=="__main__":
get_json()
Loading