#coding:utf-8 import time from selenium import webdriver from lxml import etree import sys reload(sys) sys.setdefaultencoding( "utf-8" ) friend = '' # 目的QQ号,目的QQ空间要求允许被访问 user = '' # 你的QQ号 pw = '' # 你的QQ密码 driver = webdriver.Chrome(executable_path='/Users/jiwu/Downloads/chromedriver') driver.maximize_window() driver.get("http://i.qq.com") driver.switch_to.frame("login_frame") driver.find_element_by_id("switcher_plogin").click() driver.find_element_by_id("u").send_keys(user) driver.find_element_by_id("p").send_keys(pw) driver.find_element_by_id("login_button").click() driver.switch_to.default_content() driver.get("http://user.qzone.qq.com/" + friend + "/311") next_num = 0 while True: for i in range(1,6): height = 20000*i strWord = "window.scrollBy(0,"+str(height)+")" driver.execute_script(strWord) time.sleep(4) driver.switch_to.frame("app_canvas_frame") selector = etree.HTML(driver.page_source) divs = selector.xpath('//*[@id="msgList"]/li/div[3]') with open('qq_word.txt','a') as f: for div in divs: qq_name = div.xpath('./div[2]/a/text()') qq_content = div.xpath('./div[2]/pre/text()') qq_time = div.xpath('./div[4]/div[1]/span/a/text()') qq_name = qq_name[0] if len(qq_name)>0 else '' qq_content = qq_content[0] if len(qq_content)>0 else '' qq_time = qq_time[0] if len(qq_time)>0 else '' print(qq_name,qq_time,qq_content) f.write(qq_content+" ") if driver.page_source.find('pager_next_' + str(next_num)) == -1: break driver.find_element_by_id('pager_next_' + str(next_num)).click() next_num += 1 driver.switch_to.parent_frame()
生成词云:
#coding:utf-8 from wordcloud import WordCloud import matplotlib.pyplot as plt import jieba def create_word_cloud(filename): text= open("{}.txt".format(filename)).read() wordlist = jieba.cut(text, cut_all=True) wl = " ".join(wordlist) wc = WordCloud( background_color="white", max_words=2000, font_path='/System/Library/Fonts/PingFang.ttc', height= 1200, width= 1600, max_font_size=100, random_state=30, ) myword = wc.generate(wl) plt.imshow(myword) plt.axis("off") plt.show() wc.to_file('py_book.png') if __name__ == '__main__': create_word_cloud('qq_word')