zoukankan      html  css  js  c++  java
  • 假期进度七:Python实现基础爬虫

    """
    初次使用urllib实现爬虫的数据请求
     urllib.request.urlopen(url) 发起get请求
     urllib.parse.quote() 将中文进行url编码
     urllib.request.urlretrieve(url,filename) 下载url保存到filename
    """
    from urllib.request import urlopen, urlretrieve, Request
    from urllib.parse import quote
    
    import ssl
    
    ssl._create_default_https_context = ssl._create_unverified_context
    
    
    def search_baidu(wd='千峰'):
        # 网络资源的接口(URL)
        url = 'https://www.baidu.com/s?wd=%s'
        # 生成请求对象,封装请求的URL和头header
        request = Request(url % quote(wd),
                          headers={
                              'Cookie': 'BIDUPSID=585E43DE7CB2B860C3B9C269E2C4D929; PSTM=1579580069; BD_UPN=12314753; BAIDUID=10484BA386BB3BF20C7E02FBB519B4CD:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=cV-OJeC62ZCrRLTrhEVfKF6Sa27L6EvTH6f3T38pC4vGwLFcuDPiEG0PSM8g0KubwmWVogKKBmOTHnuF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tb4qoCDbJDt3qPbNq4Tq5b-eKxRJa4r-HD7yW6rJaDvjS66Oy4oTj6DlLp5NWp3H2gTp0qvIJfOHhC3sjxo-3MvBbGbnJ5vj0bnqK-313JOhsMJYQft20htIeMtjBbQabKjU-J7jWhvIDq72y-ThQlRX5q79atTMfNTJ-qcH0KQpsIJM5-DWbT8EjH62btt_tJPDoK5P; delPer=0; BD_CK_SAM=1; PSINO=2; BDUSS=9uYVhkbEEzWW5JblR0LXlqeGR3b3p5N2t1Q0NzR3puOXhBNW1tR3ZnTXlXV1ZmRVFBQUFBJCQAAAAAAAAAAAEAAACjAMfnzt7H6bXEztLP8bfJ0akAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADLMPV8yzD1fbW; BDUSS_BFESS=9uYVhkbEEzWW5JblR0LXlqeGR3b3p5N2t1Q0NzR3puOXhBNW1tR3ZnTXlXV1ZmRVFBQUFBJCQAAAAAAAAAAAEAAACjAMfnzt7H6bXEztLP8bfJ0akAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADLMPV8yzD1fbW; BD_HOME=1; COOKIE_SESSION=4904_0_8_6_11_24_0_2_7_4_1_1_0_0_15_0_1597883995_0_1597888884%7C9%231182301_18_1597881542%7C9; BDRCVFR[Qs-Y_7gNldt]=OjjlczwSj8nXy4Grjf8mvqV; H_PS_PSSID=; sug=3; sugstore=0; ORIGIN=0; bdime=0',
                              'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,'
                                            ' like Gecko) Chrome/84.0.4147.125 Safari/537.36'
                          })
        response = urlopen(request)  # 发起请求
        assert response.code == 200
        print('请求成功')
        # 读取响应的数据
        bytes_ = response.read()
        with open('%s.html' % wd, 'wb') as file:
            file.write(bytes_)
    
    
    def download_img(url):
        # 从url中获取文件名
        filename = url[url.rfind('/') + 1:]
        urlretrieve(url, filename)
    
    
    if __name__ == '__main__':
        # search_baidu()
        download_img('https://www.dy2018.com/d/file/html/gndy/dyzz/2020-08-20/7a861af82beb6e25cd6729988c545c61.jpg')
  • 相关阅读:
    kubernetes之常见故障排除(一)
    kubernetes集群管理命令(三)
    kubernetes集群管理命令(二)
    kubernetes集群管理常用命令一
    kubernetes集群管理之通过jq来截取属性
    kubernetes管理之使用yq工具截取属性
    kubectl技巧之通过jsonpath截取属性
    kubectl技巧之通过go-template截取属性
    kubernetes容器编排之定义环境变量以及通过downwardapi把pod信息作为环境变量传入容器内
    kubectl技巧之查看资源列表,资源版本和资源schema配置
  • 原文地址:https://www.cnblogs.com/yeyueweiliang/p/13537290.html
Copyright © 2011-2022 走看看