zoukankan      html  css  js  c++  java
  • python 下载文件

    下载文件

    # coding: UTF-8
    #20190106
    #源文件 写入   批量下载加个for循环
    ##单线程下载
    import requests
    #源地址
    str1="sogoupinyinzd423.rar"
    url=r"http://down.sd173.com/soft1/"+str1
    
    #目的地  下载的名字叫做 str1
    path=str1
    r=requests.get(url)
    print ("下载完成")
    with open(path,"wb") as f:
        f.write(r.content)
    f.close()
    

    下载小说

    #http://www.coblogs.com/Eva-J/articles/7228075.html#_label10
    
    
    import requests
    
    import re
    import json
    
    
    def getPage(url):
       response = requests.get(url)
       return response.text
    
    
    def parsePage(s):
       # com = re.compile(
       #    '<font color="#000">(?P<id>.*?)</font>.*?<td>(?P<kk>.*?)</td>', re.S)
       # print(com)
       # com = re.compile(
       #     '<font color="#000">(?P<id>.*?)</font>.*?<td>(?P<kk>.*?)</td>', re.S)   pk
    
       com = re.compile(
           '<font color="#000">(?P<id>.*?)</font>.*?<tr><td>(?P<kk>.*?)</td></tr>', re.S)
       # com = re.compile(
       #    '<font color="#000">(?P<id>.*?)</font>.*?<tbody>(?P<kk>.*?)</tbody>', re.S)
       # com = re.compile(
       #       '<a href="/">(?P<id>.*?)</a>', re.S)
       ret = com.finditer(s)
       for i in ret:
           yield {
               "内容": i.group("kk"),
               "题目": i.group("id"),
    
    
    
           }
    
    
    def main():
           i=[595]
           for j in i:
               print(j)
               url = "/538"+(str)(j)+".html"
               # r = requests.get(url)
               # r.encoding = 'utf8'
               # print (r.text)
    
               response_html = requests.get(url)
               response_html.encoding = 'utf8'
               # print(response_html.text)
               ret = parsePage(response_html.text)
               f = open("7.txt", "a", encoding="utf8")
               for obj in ret:
                   print("ok"+(str)(j))
                   data = json.dumps(obj, ensure_ascii=False)
                   data = data.replace('<br />', '')
                   data = data.replace('\r\n', '')
                   f.write(data + "
    ")
    if __name__ == '__main__':
           main()
    
  • 相关阅读:
    Handsontable添加超链接
    Handsontable 筛选事件
    handsontable自定义渲染
    M1 Mac安装 Homebrew
    Pypi官网怎么找历史依赖包
    在 CentOS7 中我们在安装 MySQL
    Ansible使用yum安装
    Ansible集群自动化运维操作
    java对list中map集合中某个字段排序
    使用hive的orcfiledump命令查看orc文件
  • 原文地址:https://www.cnblogs.com/zhenqk/p/12387486.html
Copyright © 2011-2022 走看看