zoukankan      html  css  js  c++  java
  • 大学排名定向爬虫

    import requests
    import bs4
    from bs4 import BeautifulSoup
    def getHTMLText(url):
        try:
            r = requests.get(url,timeout = 30)
            r.raise_for_status()
            #print(r.text)
            r.encoding = r.apparent_encoding
            return r.text
        except:
            print('发生错误')
            return ''
    
    def fillunivList(ulist,html):
        soup = BeautifulSoup(html,'html.parser')
        for tr in soup.find('tbody').children:
            if isinstance(tr,bs4.element.Tag):
                tds = tr('td')
                ulist.append([tds[0].next_element,tds[1].string,tds[2].string])
    
    def printUnivlList(ulist,num):
        #转化成中文空格
        print('{0:^10}	{1:{3}^10}	{2:^10}'.format('排名','学校','城市',chr(12288)))
        for i in range(num):
            u = ulist[i]
            print('{0:^10}	{1:{3}^10}	{2:^10}'.format(u[0],u[1],u[2],chr(12288)))
            #print(u)
        print('Suc' + str(num))
    
    def main():
        uinfo = []
        url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2017.html'
        getHTMLText(url)
        html = getHTMLText(url)
        fillunivList(uinfo,html)
        printUnivlList(uinfo,80)#only 20 schools
    main()
  • 相关阅读:
    软件工程——股票利润
    软件工程——爬楼梯
    总结
    团队4
    团队答辩
    团队3
    软工2
    团队合作(1)
    软件工程第二次作业
    软件工程第一次作业(2)
  • 原文地址:https://www.cnblogs.com/zhanghaijie/p/8410412.html
Copyright © 2011-2022 走看看