zoukankan      html  css  js  c++  java
  • 两段实际爬虫程序应用

    import requests
    from  lxml   import  html
    etree = html.etree
    from  bs4 import  BeautifulSoup
    url = "https://mp.weixin.qq.com/s/drle9K4jgVWxm4v14ETbpQ"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, features='html.parser')
    content = soup.prettify()
    html_content = etree.HTML(content)
    #//*[@id="js_content"]/section[2]/section/section[2]/table
    ret_data = html_content.xpath('//tr')
    for  item in ret_data:
        con = item.xpath("./td[1]/text()")
        con1 =item.xpath("./td[2]/text()")
        print(con[0].strip("
    ").strip(" ").strip("
    ") + con1[0].strip("
    ").strip(" "))
    

     

    import requests
    from  lxml   import  html
    etree = html.etree
    from  bs4 import  BeautifulSoup
    url = "https://mp.weixin.qq.com/s/Zt2K7aOfSr8mrSdArfzWAg"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, features='html.parser')
    content = soup.prettify()
    html_content = etree.HTML(content)
    #//*[@id="js_content"]/section[2]/section/section[2]/table
    #//*[@id="js_content"]/section[2]/section/section[2]/section[2]/section/section/table/tbody/tr[6]/td[1]/p/span[1]
    ret_data = html_content.xpath('//tr')
    for  item in ret_data:
        con = item.xpath("./td[1]/p/span[1]/text()")
        con1 = item.xpath("./td[1]/p/span[2]/text()")
        con2 =item.xpath("./td[2]/p/text()")
        con3 =item.xpath("./td[2]/p/span/text()")
        # print(con)
        # print(con1)
        # print(con2[0].strip("
    ").strip(" ").strip("
    "))
        # print(con3[0].strip("
    ").strip(" ").strip("
    "))
        if con1:
            print(con[0].strip("
    ").strip(" ").strip("
    ")+con1[0].strip("
    ").strip(" ").strip("
    ")+ "      答案→" +con2[0].strip("
    ").strip(" ").strip("
    ") + con3[0].strip("
    ").strip(" ").strip("
    ") )
        else :
            print(con[0].strip("
    ").strip(" ").strip("
    ") )
        # print( con1[0].strip("
    ").strip(" "))
        # print(con)
        # print(con1)

     

    下载csv文件,并获取其内容

    import csv
    import requests
    
    CSV_URL = 'https://www.remedy-cloud.com/download/csv/CVE-2020-1938'
    
    with requests.Session() as s:
        download = s.get(CSV_URL)
        decoded_content = download.content.decode('utf-8')
        cr = csv.reader(decoded_content.splitlines(), delimiter=',')
        my_list = list(cr)
        for row in my_list:
            print(row)
    

      

  • 相关阅读:
    500桶酒中有一桶毒酒
    查看docker run参数(亲测实用)
    ubuntu密码忘记-备份
    python sklearn2pmml
    javafx弹窗显示错误堆栈
    Java实现新开一个进程
    MockServer调试通过,本地通过浏览器可以打开对应web网页
    java 实现Put request
    JAVA发送HttpClient请求及接收请求完整代码实例
    我还是很喜欢你
  • 原文地址:https://www.cnblogs.com/weidaijie/p/14118768.html
Copyright © 2011-2022 走看看