今天学习1.5小时
继续昨天的爬虫学习

from bs4 import BeautifulSoup from bs4 import * import re import requests from fake_useragent import UserAgent url='https://www.qiushibaike.com/text/' headers={ "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62" } resp=requests.get(url,headers=headers) # print(resp.text) #创建一个bs4的对象 soup=BeautifulSoup(resp.text,'lxml') #获取标签 # print(soup.span) #获取属性 print(soup.div.attrs) print(soup.div.get('id')) print(soup.a['href']) #获取内容 print(soup.title.string) print(soup.title.text) # print(type(soup.div.string)) # #findall() # m=soup.find_all('div') print(soup.find_all(class_='author'))

from urllib.request import * from urllib.parse import urlencode from fake_useragent import UserAgent from random import * from http.cookiejar import MozillaCookieJar def get_cookie(): login_url="http://www.sxt.cn/index/login/login" form_data={ "user":"17703181473", "password":"123456" } headers={ "User-Agent":UserAgent().random } req=Request(login_url,headers=headers) cookie_jar=MozillaCookieJar() handler=HTTPCookieProcessor(cookie_jar) opener=build_opener(handler) resp=opener.open(req) cookie_jar.save('cookie.txt',ignore_discard=True,ignore_expires=True) def use_cookie(): info_url="http://www.sxt.cn/index/user.html" headers = { "User-Agent": UserAgent().random } req=Request(info_url,headers=headers) cookie_jar=MozillaCookieJar() cookie_jar.load("cookie.txt",ignore_expires=True,ignore_discard=True) handler=HTTPCookieProcessor(cookie_jar) opener=build_opener(handler) resp=opener.open(req) print(resp.read().decode) if __name__=='__main__': get_cookie() use_cookie()