zoukankan      html  css  js  c++  java
  • python爬虫学习(二)

    #获取相应内容
    import requests
    from bs4 import BeautifulSoup
    # r=requests.get("http://www.santostang.com/")
    # print("文本编码格式:",r.encoding)
    # print("响应状态码:",r.status_code)
    # print("字符串方式的响应体:",r.text) #打印整个html页面

    #响应超时
    # link="http://www.santostang.com/"
    # r=requests.get(link,timeout=0.001)

    #定制Requests
    # key_dict={"key1":"value1","key2":"value2"}
    # req=requests.get("http://httpbin.org/get",params=key_dict)
    # print("URL已正确编码:",req.url)
    # print("字符串方式的响应体: ",req.text)
    # req=requests.post("http://httpbin.org/get",data=key_dict)
    # print("URL已正确编码:",req.url)
    # print("字符串方式的响应体: ",req.text)

    # headers={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0",
    # "Host":"www.santostang.com"
    # }
    # r=requests.get("http://www.santostang.com/",headers=headers)
    # print("响应状态码:",r.status_code)
    def get_movies():
    headers={
    "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0",
    "Host":"movie.douban.com"
    }
    # r=requests.get("https://movie.douban.com/top250",headers=headers)
    movie_list=[]
    for i in range(10):
    link="https://movie.douban.com/top250?start="+str(i*25)
    r=requests.get(link,headers=headers,timeout=10)
    print(str(i+1),"页面响应状态码:",r.status_code)
    soup=BeautifulSoup(r.text,"lxml")
    div_list=soup.find_all("div",class_="hd")
    for each in div_list:
    movie=each.a.span.text.strip()
    movie_list.append(movie)
    return movie_list
    movies=get_movies()
    print(movies)
  • 相关阅读:
    NYOJ926(概率)
    无根树转有根树
    Codeforces Round #304 C(Div. 2)(模拟)
    hdu5878(枚举,打表)
    求一个矩阵的逆矩阵(用伴随矩阵求)
    求n阶方阵的值(递归)
    setw()函数
    快速幂计算(整数快速幂/矩阵快速幂)
    全文检索
    发送邮件
  • 原文地址:https://www.cnblogs.com/momingzhong/p/11901853.html
Copyright © 2011-2022 走看看