zoukankan      html  css  js  c++  java
  • python简单爬去前程无忧信息招聘

    import sys
    reload(sys)
    sys.setdefaultencoding('utf-8')
        
    import requests
    import csv
    from BeautifulSoup import BeautifulSoup
    
    def get_content(page):
        url = 'http://search.51job.com/list/200200,000000,0000,32,9,99,python,2,'+str(page)+'.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
        reponse = requests.get(url)
        html = reponse.content
        soup = BeautifulSoup(html)
        return soup
    
    def get(soup):
        inf_list = list()
        tag1 = soup.find('div', attrs={'class': 'jblist res'})    
        for label in tag1.findAll('a',attrs={'class':'e e2 eck'}):
            title = label.find('h3').text.strip()
            company = label.find('aside').text.strip()
            money = label.find('em').text.strip()
            inf_list.append((title, company, money))
        with open("imdb.csv","a") as f:
            fw = csv.writer(f)
    #    fw.writerow(['职位','公司','薪资'])
            fw.writerows(inf_list)
    #    return inf_list
    
    with open("imdb.csv","wb") as f:
        fw = csv.writer(f)
        fw.writerow(['职位','公司','薪资'])
        for j in range(1, 10):
            print  "-----正在爬第"+str(j)+"页内容---------"
            html = get_content(j)
            get(html)
  • 相关阅读:
    1225. 岛的周长
    238. Product of Array Except Self除自身以外数组的乘积
    RxJava--Buffer,GroupBy 对比
    RxJava--Buffer,GroupBy 对比
    HDU-2182-Frog
    HDU-2182-Frog
    Linux下必知必会文件和目录
    Linux下必知必会文件和目录
    获取一篇新闻的全部信息
    获取一篇新闻的全部信息
  • 原文地址:https://www.cnblogs.com/Kermit-Li/p/6848936.html
Copyright © 2011-2022 走看看