绘制词云
在 pubmed 中检索关键词 "antibody drug conjugate",得到文献的summary后生成词云
from Bio import Entrez
from wordcloud import WordCloud
# 在 pubmed 中检索关键词 "antibody drug conjugate",得到文献的summary后生产词云图
handle = Entrez.esearch(db="pubmed", usehistory='y', term="antibody and drug and conjugate")
record = Entrez.read(handle)
web_env = record['WebEnv']
query_key = record['QueryKey']
handle = Entrez.efetch(db="pubmed", WebEnv=web_env, query_key=query_key, rettype="abstract", retmode='text')
with open('data.txt', "w", encoding="utf-8") as f:
f.write(handle.read())
with open('data.txt', "r", encoding="utf-8") as f:
text = f.read()
filter_list = ["Author", "information", "[Indexed", "MEDLINE]", "PMID:", "antibody", "drug", "conjugate"]
filtered_text = []
text_list = text.split()
# 过滤掉一些意义不大,却频率很高的词
for w in text_list:
if w not in filter_list:
filtered_text.append(w)
text = ' '.join(filtered_text)
wc = WordCloud(width=1200, height=800)
wc.generate(text)
wc.to_file("word_cloud.png")
# todo 生成的词云没有给出很有意义的信息,需要进一步过滤单词