1.requests的常用参数
method:请求方式
url:地址
params:URL中传入参数
headers:请求头
cookies:Cookie
data:数据
json:数据
proxies:代理
2.爬取github个人项目
import requests from bs4 import BeautifulSoup #1.首先获取token与cookies r1=requests.get( url='https://github.com/login' ) s1=BeautifulSoup(r1.text,features='lxml') #获取token token=s1.find(name='input',attrs={'name':'authenticity_token'}).get('value') # print(token) cookie_dict1=r1.cookies.get_dict() #2.登录github将token以及cookies传进 r2=requests.post( url='https://github.com/session', data={ 'commit':'Sign in', 'utf8':'✓', 'authenticity_token':token, 'login':'xxxx', 'password':'xxx', }, # cookies={ # '_octo':'GH1.1.1824995587.1516589976', # '_ga':'GA1.2.1953490372.1516589976' # } cookies=cookie_dict1 ) #3.将登录后的cookies一起添加到第一次访问的cookies中 cookie_dict2=r2.cookies.get_dict() cookie_dict1.update(cookie_dict2) #4.获取个人github的项目 r3=requests.get( url='https://github.com/settings/repositories', cookies=cookie_dict1 ) soup3=BeautifulSoup(r3.text,features='lxml') #5.查找整个项目的属性 list_group=soup3.find(name='div',attrs={'class':'listgroup'}) # print(list_group) list_a=list_group.find_all(name='a',attrs={'class':'mr-1'}) print(list_a) #6.获取github所有项目 for item in list_a: print(item.get('href'))