zoukankan      html  css  js  c++  java
  • python简单爬去前程无忧信息招聘

    import sys
    reload(sys)
    sys.setdefaultencoding('utf-8')
        
    import requests
    import csv
    from BeautifulSoup import BeautifulSoup
    
    def get_content(page):
        url = 'http://search.51job.com/list/200200,000000,0000,32,9,99,python,2,'+str(page)+'.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
        reponse = requests.get(url)
        html = reponse.content
        soup = BeautifulSoup(html)
        return soup
    
    def get(soup):
        inf_list = list()
        tag1 = soup.find('div', attrs={'class': 'jblist res'})    
        for label in tag1.findAll('a',attrs={'class':'e e2 eck'}):
            title = label.find('h3').text.strip()
            company = label.find('aside').text.strip()
            money = label.find('em').text.strip()
            inf_list.append((title, company, money))
        with open("imdb.csv","a") as f:
            fw = csv.writer(f)
    #    fw.writerow(['职位','公司','薪资'])
            fw.writerows(inf_list)
    #    return inf_list
    
    with open("imdb.csv","wb") as f:
        fw = csv.writer(f)
        fw.writerow(['职位','公司','薪资'])
        for j in range(1, 10):
            print  "-----正在爬第"+str(j)+"页内容---------"
            html = get_content(j)
            get(html)
  • 相关阅读:
    面向对象初识
    day 20 异常
    day 16 正则表达式
    day 13 生成器,推导式
    CSS
    html页面编写
    DAY 17常用模块
    DAY16 模块和包的导入
    DAY15 模块
    DAY14 函数(三)
  • 原文地址:https://www.cnblogs.com/Kermit-Li/p/6848936.html
Copyright © 2011-2022 走看看