zoukankan      html  css  js  c++  java
  • 猫眼前100

    #mzitu
    '''
    User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36 115Browser/8.6.2
    '''
    # -*- coding=utf-8 -*-
    import requests
    import lxml
    import json
    from lxml import etree

    def getOnePage(n):
    url = f'http://maoyan.com/board/4?offset={n*10}'
    url2 = 'http://www.mzitu.com/hot/'
    header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36 115Browser/8.6.2'}
    r = requests.get(url,headers = header)
    print(r)
    return(r.text)
    #global html=''
    print('世界,你好! hello world! ')

    item = {}
    id = 0
    def parse(text):
    html = etree.HTML(text)
    names = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="name"]/a/@title')
    starts = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="star"]/text()')
    releasetimes = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="releasetime"]/text()')
    # for i in range(0,len(names)):
    # print(names[i],starts[i],releasetimes[i])
    for name,star,releasetime in zip(names,starts,releasetimes):
    item['名称'] = name
    #item['star'] = star
    item['time'] = releasetime
    yield item


    def save2file(data):
    with open('movie.json','a',encoding='utf-8') as f:
    data = json.dumps(data,ensure_ascii = False)+', '
    f.write(data)

    def run():
    for n in range(0,10):
    global id
    text = getOnePage(n)
    items = parse(text)
    #print(item)
    for item in items:
    id += 1
    print(id,item)
    save2file(item)
    #print(html)

    if __name__ == '__main__':

    run()

  • 相关阅读:
    strip()、rstrip()和lstrip()
    Vim 中快速移动系列(1)
    Python中的read(), readline(), readlines()
    Python 列表解析(列表生成式)
    Python lambda 表达式介绍
    Python中sort()和sorted()的区别
    js 高级
    maven学习笔记
    Maven之settings.xml详解
    Eclipse 学习笔记
  • 原文地址:https://www.cnblogs.com/pscc/p/9774919.html
Copyright © 2011-2022 走看看