zoukankan      html  css  js  c++  java
  • 初识Python 03 day

    # 一.request请求库
    # 安装与使用
    # 1.打开cmd
    #2.输入pip3 install requests
    
    # import requests #导入request请求库
    
    #
    # #向百度主页发送请求,获取相应对象
    # response=requests.get(url='https://www.baidu.com/')
    #
    # #设置字符编码为utf-8
    # response.encoding='utf-8'
    #
    # #打印相应文本
    # print(response.text)
    # #将相应文本写入本地
    # with open ('baidu.html','w',encoding='utf-8') as f:
    #     f.write(response.text)
    
    
    # 视频
    # import requests #导入request请求库
    # response=requests.get('https://video.pearvideo.com/mp4/third/20190612/cont-1565462-11308777-161601-hd.mp4')
    # print(response.content)
    # with open ('视频.MP4','wb')as f:
    #  f.write(response.content)
    
    
    '''1.向页面主页发送请求
         解析获取所有视频的id
         例如:
          video——1570302
         re.findall()
        2.获取视频详情页(地址栏)
    
    
    '''
    # import requests
    # import re   #正则,用于解析文本数据
    # #1.向主页发送请求
    # response=requests.get('https://www.pearvideo.com/')
    # print(response.content)
    # # re正则匹配获取所有视频id
    # # 参数1:正则匹配规则
    # # 参数2.解析文本
    # # 参数3.匹配模式
    # res_list=re.findall('<a href="video_(.*?)"',response.text,re.S)
    # print(res_list)
    # #2.拼接视频详情页
    # for v_d in res_list:
    #         detai_url="https://www.pearvideo.com/video_"+v_d
    #         print(detai_url)
    #
    #         #对每一个视频详情页发送请求获取视频资源
    #         response=requests.get(url=detai_url)
    #         print(response.text)
    #         #解析并提取详情页视频URL
    #         video_url=re.findall('srcUrl="(.*?)"',response.text,re.S)  [0]
    #         print(video_url)
    #
    #         #视频名称
    #         video_name=re.findall('< h1 class="video_tt"(.*?)</h1>',response.text,re.S)[0]
    #         print(video_name)
    #
    #         #向视频URL发送请求获取 视频二进制流
    #         v_response=requests.get(video_url)
    #
    #         with open ('%s.mp4'%video_name,'wb')as f:
    #                 f.write(v_response.content)
    #                 print(video_name,'视频爬取成功')
    #
    
    
    #3.抓包分析
    #  打开浏览器开发者模式(检查)-->选中network
    #
    # 1.请求URL
    # 2.请求方式
    #  get:
    #      直接发送请求   https://www.cnblogs.com/kermitjam/articles/9697851.html
    #  post:
    #  需要携带用户信息发送到目的地址
    #
    #         https://www.cnblogs.com/login
    #          {            'user':'qing'
    #          'pwd':'123'
    #          }
    #
    #3.响应状态:
    #2xx:成功
    #3xx:重定向
    #4xx:找不到资源
    #5xx:服务器错误
    #4 请求头信息
    #User_Agent:用户代理(证明是通过电脑设备及浏览器发送的请求)
    #Cookie:登录用户信息(证明你目标网站用户)
    #Refer:上次访问的URL(证明你是从目标网站转过来的)
    #5请求体:
    #  post请求才有请求体
    #         Form  Data
    #          {            'user':'qing'
    #          'pwd':'123'
    #          }
    
    
    # 爬虫步骤
    # 1.发送请求
    import requests
    import re
    
    
    def get_page(base_url):
            response = requests.get(base_url)
            return response
    
    
    # 2.解析文本
    def parse_index(text):
            res = re.findall(
                    '<div class="item">.*?<em class="">.*?</em>.*?<a href="(.*?)>.*?<span class="title"></span>.*?导演:(.*?)</p>.*?<span class="rating_num".*?>(.*?)</span>.*?<span>(.*?)人评价</span>.*?<span class="inq">(.*?)</span>',
                    text, re.S)
            return res
    
    
    # 3.保存数据
    def save_data(data):
            with open('douban.txt', 'a', encoding='utf-8')as f:
                    f.write(data)
    
    
    # main+回车键
    if __name__ == '__main__':
            # num=10
            # base_url='https://movie.douban.com/top250?start={}&filter='.format()
            num = 0
    for line in range(10):
            base_uri = 'https://movie.douban.com/top250?start={num}&filter='
            num += 25
            print(base_uri)
    
            # 1.发送请求,调用函数
            response = get_page(base_uri)
            # 2.解析文本
            movie_list = parse_index(response.text)
            # 3.保存数据
            # 数据的格式化
    for movie in movie_list:
    
            # 解压赋值
            # 电影排名,电影URL,电梯名称,导演-主演-类型,评价人数,电影简介
            v_top, v_url, v_name, v_daoyan, v_point, v_num, v_desc = movie
            # v_top=movie[0]
            movie_content = f'''
            电影排名:{v_top}
            电影URL:{v_url}
            电影名称:{v_name}
            电影主演:{v_daoyan}
            电影评分:{v_point}
            评价人数:{v_num}
            d电影简介:{v_desc}
            
    
            '''
            print(movie_content)
            # 保存数据
            save_data(movie_content)
  • 相关阅读:
    VS2008编写MFC程序--使用opencv2.4()
    November 02nd, 2017 Week 44th Thursday
    November 01st, 2017 Week 44th Wednesday
    October 31st, 2017 Week 44th Tuesday
    October 30th, 2017 Week 44th Monday
    October 29th, 2017 Week 44th Sunday
    October 28th, 2017 Week 43rd Saturday
    October 27th, 2017 Week 43rd Friday
    October 26th, 2017 Week 43rd Thursday
    October 25th, 2017 Week 43rd Wednesday
  • 原文地址:https://www.cnblogs.com/qing1051663949/p/11093816.html
Copyright © 2011-2022 走看看