zoukankan      html  css  js  c++  java
  • 猫眼 top_100 爬取 ___只完成了第一页

    # python 3.7
    from urllib.request import Request,urlopen
    import time,re,csv
    
    class Maoyan(object):
        def __init__(self):
            self.header = {
            'Connection': 'keep - alive',
                'Cookie': 'uuid_n_v=v1; uuid=16B52300EED311E8A50EC9D5D894D382A1072CB6CA3D4BAA95D7EA39B1BB3637; _lxsdk_cuid=1673eb37e1fc8-011175d5446e19-424f0928-13c680-1673eb37e20c8; _lxsdk=16B52300EED311E8A50EC9D5D894D382A1072CB6CA3D4BAA95D7EA39B1BB3637; _csrf=6597fe121a59ff12f8bf1b793cb7d29274a118e066c86f8bf88b8e765b7d4dad; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; __mta=145127947.1542945209936.1542945209936.1542954826219.2; _lxsdk_s=1673f4639ac-357-82a-15d%7C%7C4',
                'Host': 'maoyan.com',
                'Referer': 'http://maoyan.com/board',
                'Upgrade - Insecure - Requests': 1,
                'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
    
            }
    
    
        def get_page(self,url):
            res = urlopen(Request(url =url,headers=self.header)).read()
            self.parsePage(res.decode())
    
        def parsePage(self,res):
            patten = 'data-val="{.*?}">(.*?)</a></p>s+<p class="star">s+(.*?)s+</p>s+<p class="releasetime">(.*?)</p>'
            a = re.findall(patten,res)
            self.write(a)
    
        def write(self,a):
            for i in a:
                with open('11.csv','a+',newline='',encoding='gbk') as f:
                    a = csv.writer(f)
                    a.writerow(list(i))
    
        def wordon(self):
            pass
    
    if __name__ == '__main__':
        a = Maoyan()
        a.get_page('http://maoyan.com/board/4?offset=0')
  • 相关阅读:
    递归
    正则表达式的理解
    JSP九大置对象
    Cookie的使用
    sql语句按照时间段查询
    文件拷贝 上传下载 输入流输出流个人小结,仅供自己使用
    动态SQL 与sql片段 foreach
    ResultMap
    hibernate
    idea
  • 原文地址:https://www.cnblogs.com/Skyda/p/10008866.html
Copyright © 2011-2022 走看看