zoukankan      html  css  js  c++  java
  • 爬取某电影网站最新电影

     1 # -*- coding: utf-8 -*-
     2 """
     3 Created on Wed Oct 12 16:48:33 2016
     4 
     5 @author: fuzzier
     6 """
     7 
     8 import requests
     9 from bs4 import BeautifulSoup
    10 import re
    11 import os
    12 import codecs
    13 
    14 URL = 'http://www.xxxxx.net'
    15 
    16 def download_page(url):
    17     headers = {'User_Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1581.2 Safari/537.36'}
    18     html = requests.get(url,headers=headers).content
    19     return html
    20     
    21 def parser_html(data):
    22     soup = BeautifulSoup(data,'html.parser')
    23     films = []
    24     trs = soup.find('div',class_='bd3rl').find('div',class_='co_content8').find_all('tr')
    25     for i in trs:
    26         tr = i.find('a',href=re.compile(r'/w+?/w+?/w+?/d+?/d+?.html')).string
    27         if tr:
    28             films.append(tr)
    29         else:
    30             films.append('None')
    31     return films
    32 
    33 if __name__ == '__main__':
    34     html = download_page(URL)
    35     film_list = parser_html(html)
    36     with codecs.open(os.getcwd()+'\dytt8_hot.txt','w',encoding='utf8') as f:
    37         for i in film_list:
    38             f.write(i+'
    ')

  • 相关阅读:
    JMeter 关联
    JMeter MD5加密
    JMeter 时间函数
    JMeter 常用设置
    JMeter 服务器资源监控
    js制作列表滚动(有滚动条)
    js监听事件
    获取窗口大小 并自适应大小变化
    js 标签云
    js 显示数字不断增加
  • 原文地址:https://www.cnblogs.com/fuzzier/p/5953973.html
Copyright © 2011-2022 走看看