forked from ckreibich/scholar.py
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Pubmed_searchkeywords.py
63 lines (56 loc) · 1.69 KB
/
Pubmed_searchkeywords.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from Bio import Entrez
import types
import json
import pdb
from os import path
from wordcloud import WordCloud
Entrez.email = '[email protected]'
list_json=[]
list_txt=[]
def get_json():
with open('title.json') as json_file:
data = json.load(json_file) #list
for m in data:
m = m.replace("title={","")
m = m.replace("},","")
m = m.strip()
list_json.append(m)
def search(query):
handle = Entrez.esearch(db='pubmed',
sort='relevance',
retmax='1',
retmode='xml',
term=query)
results= Entrez.read(handle)
list=results.get('IdList')
str = ''.join(list)
return str
def getkeywords(id_number):
file = open('constitution.txt', 'w')
handle = Entrez.efetch(db="pubmed", id=id_number,rettype="abstract", retmode="xml")
a=handle.read()
b=a.split("\n")
for c in b:
if "Keyword MajorTopicYN" in c: #string
c = c.replace('<Keyword MajorTopicYN="N">',"")
c = c.replace("</Keyword>","")
list_txt.append(c)
str_change = ''.join(list_txt)
file.write(str_change)
file.close()
if __name__=="__main__":
get_json()
for i in list_json:
str = search(i)
getkeywords(str)
d = path.dirname(__file__)
text = open(path.join(d, 'constitution.txt')).read()
wordcloud = WordCloud().generate(text)
import matplotlib.pyplot as plt
plt.imshow(wordcloud)
plt.axis("off")
wordcloud = WordCloud(max_font_size=40).generate(text)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()