zoukankan      html  css  js  c++  java
  • 记一次对选美网站的爬取

    #-*- coding:utf-8 -*-
    __author__ = "MuT6 Sch01aR"
    
    import requests
    from bs4 import BeautifulSoup
    import sys
    import io
      
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030')
    
    respons = requests.get(
        url = "http://www.baidu.com"
    )
    
    respons.encoding = respons.apparent_encoding
    
    soup = BeautifulSoup(respons.text,features="html.parser")
    
    target = soup.find('table',attrs={'class':'ListProduct'})
    
    tr_list = target.find_all("tr")
    
    for i in tr_list[1:200]:
        td_list = i.find_all("td")
        img_url = i.find_all('img')
        #txt = "姓名:"+td_list[0].text,"照片:"+"http://www.baidu.com/"+td_list[1].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[2].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[3].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[4].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[5].find('img').attrs.get("src"),"电话:"+td_list[6].text,"信息:"+td_list[7].text,"报名日期:"+td_list[8].text,"微信号:"+td_list[9].text
        #img_urls = "http://www.baidu.com/"+td_list[1].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[2].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[3].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[4].find('img').attrs.get("src"),"http://www.baidu.com/"+td_list[5].find('img').attrs.get("src")
        txt = "姓名:"+td_list[0].text,"电话:"+td_list[6].text,"信息:"+td_list[7].text,"报名日期:"+td_list[8].text,"微信号:"+td_list[9].text
        file_name = td_list[0].text + ".txt"
        with open(file_name,"wb") as s:
            s.write(bytes(str(txt).encode("utf-8")))
        for a in range(1,6):
            if td_list[a].find('img').attrs.get("src") != "":
                img_urls = "http://www.baidu.com/" + td_list[a].find('img').attrs.get("src")
                img_response = requests.get(url=img_urls)
                file_name1 = td_list[0].text+str(a)+ ".jpg"
                with open(file_name1,"wb") as f:
                    f.write(img_response.content)
    print("++++++++++++++++爬行结束+++++++++++++++")
    
  • 相关阅读:
    java.net.SocketException: Unconnected sockets not implemented 解
    ios 瀑布流
    IOS --- 日期时间格式 更改
    平衡二叉树(常问问题)
    Oracle 学习笔记 17 -- 异常处理(PL/SQL)
    【Java先进】Lock、通过使用线程池
    兼容 谷歌、火狐、360系列浏览器桌面通知()有用
    iOS截取特定的字符串(正则匹配)
    改造世界、知行合一、实践论、学以致用
    如何理解“哲学家们只是用不同的方式解释世界,而问题在于改变世界”?
  • 原文地址:https://www.cnblogs.com/sch01ar/p/7712051.html
Copyright © 2011-2022 走看看