zoukankan      html  css  js  c++  java
  • #016 爬虫第一次尝试

     1 import requests
     2 from bs4 import BeautifulSoup
     3 import bs4
     4 
     5 def getHTMLText(url):
     6      try:
     7           r = requests.get(url, timeout = 30)
     8           r.raise_for_status()
     9           r.encoding = r.apparent_encoding
    10           return r.text
    11      
    12           
    13      except:
    14           return ""
    15           
    16      return ""
    17 
    18 def fillUnivList(ulist,html):
    19      soup = BeautifulSoup(html, "html.parser")
    20      for tr in soup.find('tbody').children:
    21           if isinstance(tr, bs4.element.Tag):
    22                tds = tr('td')
    23                ulist.append([tds[0].string, tds[1].string, tds[2].string])
    24 
    25                
    26                
    27      
    28 
    29 def printUnivList(ulist, num):
    30      print("{:^10}	{:^6}	{:^10}".format("排名","学校名称","总分"))
    31      for i in range(num):
    32           u=ulist[i]
    33           print("{:^10}	{:^6}	{:^10}".format(u[0],u[1],u[2]))
    34 
    35 def main():
    36      uinfo = []
    37      url = 'http://www.zuihaodaxue.com/zuihaodaxuepaiming2016.html'
    38      html = getHTMLText(url)
    39      fillUnivList(uinfo,html)
    40      printUnivList(uinfo, 20)
    41 main()
    42      

    结合 这个课第一次敲得爬虫,,,,没有Pyhton语法,主要是老师说啥我咋敲,没有技术含量.就是不知道咋敲,一直报错。。

  • 相关阅读:
    .net core之上传文件的限制
    如何Telnet端口
    ES坑之logstash配置文件
    MySQL报错packets larger than max_allowed_packet are not allowed
    ES坑之安装
    ES坑之概述
    Git
    IDEA 搭建 maven(下)
    IDEA搭建maven(上)
    JDBC
  • 原文地址:https://www.cnblogs.com/hx97/p/10607503.html
Copyright © 2011-2022 走看看