zoukankan      html  css  js  c++  java
  • 猫眼前100

    #mzitu
    '''
    User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36 115Browser/8.6.2
    '''
    # -*- coding=utf-8 -*-
    import requests
    import lxml
    import json
    from lxml import etree

    def getOnePage(n):
    url = f'http://maoyan.com/board/4?offset={n*10}'
    url2 = 'http://www.mzitu.com/hot/'
    header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36 115Browser/8.6.2'}
    r = requests.get(url,headers = header)
    print(r)
    return(r.text)
    #global html=''
    print('世界,你好! hello world! ')

    item = {}
    id = 0
    def parse(text):
    html = etree.HTML(text)
    names = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="name"]/a/@title')
    starts = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="star"]/text()')
    releasetimes = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="releasetime"]/text()')
    # for i in range(0,len(names)):
    # print(names[i],starts[i],releasetimes[i])
    for name,star,releasetime in zip(names,starts,releasetimes):
    item['名称'] = name
    #item['star'] = star
    item['time'] = releasetime
    yield item


    def save2file(data):
    with open('movie.json','a',encoding='utf-8') as f:
    data = json.dumps(data,ensure_ascii = False)+', '
    f.write(data)

    def run():
    for n in range(0,10):
    global id
    text = getOnePage(n)
    items = parse(text)
    #print(item)
    for item in items:
    id += 1
    print(id,item)
    save2file(item)
    #print(html)

    if __name__ == '__main__':

    run()

  • 相关阅读:
    javascript推荐书籍
    关于Cookie和Session的优缺点
    PHP try catch
    DQL、DML、DDL、DCL的概念与区别
    XP/Win7下QTP11循环试用30天的破解方法
    struts.xml配置详解
    MyEclipse8.5破解方法
    Myeclipse编写struts程序
    关于Hibernate的关联映射
    Java代码到字节码——第一部分
  • 原文地址:https://www.cnblogs.com/pscc/p/9774919.html
Copyright © 2011-2022 走看看