zoukankan      html  css  js  c++  java
  • 两段实际爬虫程序应用

    import requests
    from  lxml   import  html
    etree = html.etree
    from  bs4 import  BeautifulSoup
    url = "https://mp.weixin.qq.com/s/drle9K4jgVWxm4v14ETbpQ"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, features='html.parser')
    content = soup.prettify()
    html_content = etree.HTML(content)
    #//*[@id="js_content"]/section[2]/section/section[2]/table
    ret_data = html_content.xpath('//tr')
    for  item in ret_data:
        con = item.xpath("./td[1]/text()")
        con1 =item.xpath("./td[2]/text()")
        print(con[0].strip("
    ").strip(" ").strip("
    ") + con1[0].strip("
    ").strip(" "))
    

     

    import requests
    from  lxml   import  html
    etree = html.etree
    from  bs4 import  BeautifulSoup
    url = "https://mp.weixin.qq.com/s/Zt2K7aOfSr8mrSdArfzWAg"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, features='html.parser')
    content = soup.prettify()
    html_content = etree.HTML(content)
    #//*[@id="js_content"]/section[2]/section/section[2]/table
    #//*[@id="js_content"]/section[2]/section/section[2]/section[2]/section/section/table/tbody/tr[6]/td[1]/p/span[1]
    ret_data = html_content.xpath('//tr')
    for  item in ret_data:
        con = item.xpath("./td[1]/p/span[1]/text()")
        con1 = item.xpath("./td[1]/p/span[2]/text()")
        con2 =item.xpath("./td[2]/p/text()")
        con3 =item.xpath("./td[2]/p/span/text()")
        # print(con)
        # print(con1)
        # print(con2[0].strip("
    ").strip(" ").strip("
    "))
        # print(con3[0].strip("
    ").strip(" ").strip("
    "))
        if con1:
            print(con[0].strip("
    ").strip(" ").strip("
    ")+con1[0].strip("
    ").strip(" ").strip("
    ")+ "      答案→" +con2[0].strip("
    ").strip(" ").strip("
    ") + con3[0].strip("
    ").strip(" ").strip("
    ") )
        else :
            print(con[0].strip("
    ").strip(" ").strip("
    ") )
        # print( con1[0].strip("
    ").strip(" "))
        # print(con)
        # print(con1)

     

    下载csv文件,并获取其内容

    import csv
    import requests
    
    CSV_URL = 'https://www.remedy-cloud.com/download/csv/CVE-2020-1938'
    
    with requests.Session() as s:
        download = s.get(CSV_URL)
        decoded_content = download.content.decode('utf-8')
        cr = csv.reader(decoded_content.splitlines(), delimiter=',')
        my_list = list(cr)
        for row in my_list:
            print(row)
    

      

  • 相关阅读:
    Python—将py文件编译成so文件
    Python—网络通信编程之套接字基本原理
    Python—IO多路复用之epoll总结
    Python—IO多路复用之poll总结
    Python—IO多路复用之select总结
    希尔排序
    直接插入排序
    冒泡排序
    选择排序(C++,Java,Python实现)
    二叉搜索树的相关功能的实现
  • 原文地址:https://www.cnblogs.com/weidaijie/p/14118768.html
Copyright © 2011-2022 走看看