zoukankan      html  css  js  c++  java
  • 第十三节 电影天堂项目实战

     1 from lxml import etree
     2 import requests
     3 
     4 
     5 baseurl = 'https://www.dytt8.net'
     6 headers = {
     7     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
     8     'Referer': 'https://www.dytt8.net/html/gndy/dyzz/index.html'
     9 }
    10 def agent(ur):
    11     resp = requests.get(ur,headers = headers)
    12     # parse = etree.HTMLParser()
    13     text = resp.text
    14     html = etree.HTML(text)
    15     # a = etree.tostring(html, encoding='utf-8').decode('utf-8')
    16     return html
    17 
    18 def movie_url_list(html):
    19     url = html.xpath("//table[@class='tbspan']//a/@href")
    20     return  url
    21 
    22 def parse_info(info,rule):
    23     return  info.replace(rule,'').strip()
    24 
    25 def xiangqingye(url):
    26     resp = requests.get(url, headers=headers)
    27     text = resp.content.decode('gbk')
    28     html = etree.HTML(text)
    29     a = html.xpath('//div[@id="Zoom"]//text()')
    30     movie = {}
    31     for info in a:
    32         if info.startswith("◎片  名"):
    33             info = parse_info(info, '◎片  名')
    34             movie['pianming'] = info
    35         if info.startswith("◎年  代"):
    36             info = parse_info(info, '◎年  代')
    37             movie['niandai'] = info
    38         if info.startswith("◎产  地"):
    39             info = parse_info(info, '◎产  地')
    40             movie['chandi'] = info
    41         if info.startswith("◎类  别"):
    42             info = parse_info(info, '◎类  别')
    43             movie['leixing'] = info
    44         if info.startswith("◎上映日期"):
    45             info = parse_info(info, '◎上映日期')
    46             movie['shangyingshijian'] = info
    47         if info.startswith("◎豆瓣评分"):
    48             info = parse_info(info, '◎豆瓣评分')
    49             movie['doubanpingfen'] = info
    50         if info.startswith("◎片  长"):
    51             info = parse_info(info, '◎片  长')
    52             movie['pianchang'] = info
    53         if info.startswith("◎标  签"):
    54             info = parse_info(info, '◎标  签')
    55             movie['biaoqian'] = info
    56     return movie
    57 
    58 def alldata():
    59     srt1 = 'https://www.dytt8.net/html/gndy/dyzz/list_23_'
    60     str2 = '.html'
    61     movies = []
    62     for i in range(1,2):
    63         url = srt1+str(i)+str2
    64         ura = agent(url)
    65         b = movie_url_list(ura)
    66         for z in b:
    67             c = baseurl + z
    68             movielist = xiangqingye(c)
    69             movies.append(movielist)
    70     return movies
    71 if __name__ == '__main__':
    72     print(alldata())
  • 相关阅读:
    Ubuntu 16 安装redis客户端
    crontab 参数详解
    PHP模拟登录发送闪存
    Nginx配置端口访问的网站
    Linux 增加对外开放的端口
    Linux 实用指令之查看端口开启情况
    无敌的极路由
    不同的域名可以指向同一个项目
    MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error
    Redis 创建多个端口
  • 原文地址:https://www.cnblogs.com/kogmaw/p/12506974.html
Copyright © 2011-2022 走看看