zoukankan      html  css  js  c++  java
  • Python小爬虫练习

    # coding: utf-8
    __author__ = 'zhangcx'
    from urllib3 import PoolManager
    import codecs
    import json
    
    class myjob(object):
        def __init__(self):
            self._page = 1
            self._totalPageCount = 0
            self._first = True
            self._hasNextPage = True
            self._http =  PoolManager()
    
        def getjob(self):
            if(self._hasNextPage):
                    r = self._http.request('POST','http://www.lagou.com/jobs/positionAjax.json?px=default&city=%E6%AD%A6%E6%B1%89'
                                           ,{'first':'%s' % self._first,'pn':'%d' % self._page,'kd':'Java'})
                    items = json.loads( r.data.decode('utf-8'))
                    for item in items['content']['result']:
                        print("{name},{positionName},{salary}".format(name=item['companyShortName'],positionName = item['positionName'],salary=item['salary']))
                    #print(items)
                    self._hasNextPage = items['content']['hasNextPage']
                    self._totalPageCount = items['content']['totalPageCount']
    
                    if(self._page > 0):
                        self._first = 'false'
                    if((self._page+ 1) > self._totalPageCount):
                        self._hasNextPage = False
                    self._page += 1
                    self.getjob()
    
    
    if __name__ == "__main__":
        job = myjob()
        job.getjob()



  • 相关阅读:
    二阶段任务分配
    二阶段12.2
    针对提出的意见的改进
    一阶段spring(小呆呆)团队评分
    搜狗输入法使用感受
    省呱呱典型用户和用户场景
    省呱呱意见评论
    11/21
    11/20小组计划
    11/19小组计划
  • 原文地址:https://www.cnblogs.com/huangzelin/p/5024452.html
Copyright © 2011-2022 走看看