zoukankan      html  css  js  c++  java
  • 猫眼前100

    #mzitu
    '''
    User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36 115Browser/8.6.2
    '''
    # -*- coding=utf-8 -*-
    import requests
    import lxml
    import json
    from lxml import etree

    def getOnePage(n):
    url = f'http://maoyan.com/board/4?offset={n*10}'
    url2 = 'http://www.mzitu.com/hot/'
    header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36 115Browser/8.6.2'}
    r = requests.get(url,headers = header)
    print(r)
    return(r.text)
    #global html=''
    print('世界,你好! hello world! ')

    item = {}
    id = 0
    def parse(text):
    html = etree.HTML(text)
    names = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="name"]/a/@title')
    starts = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="star"]/text()')
    releasetimes = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="releasetime"]/text()')
    # for i in range(0,len(names)):
    # print(names[i],starts[i],releasetimes[i])
    for name,star,releasetime in zip(names,starts,releasetimes):
    item['名称'] = name
    #item['star'] = star
    item['time'] = releasetime
    yield item


    def save2file(data):
    with open('movie.json','a',encoding='utf-8') as f:
    data = json.dumps(data,ensure_ascii = False)+', '
    f.write(data)

    def run():
    for n in range(0,10):
    global id
    text = getOnePage(n)
    items = parse(text)
    #print(item)
    for item in items:
    id += 1
    print(id,item)
    save2file(item)
    #print(html)

    if __name__ == '__main__':

    run()

  • 相关阅读:
    codevs1004 四子连棋
    codevs1009 产生数
    NOIP2014 寻找道路
    Tyvj1139 向远方奔跑(APIO 2009 抢掠计划)
    随机算法
    线性基
    线性基入门
    线性基 + 并查集
    欧拉公式 (平面)
    卡特兰数 + 大数
  • 原文地址:https://www.cnblogs.com/pscc/p/9774919.html
Copyright © 2011-2022 走看看