zoukankan      html  css  js  c++  java
  • 两段实际爬虫程序应用

    import requests
    from  lxml   import  html
    etree = html.etree
    from  bs4 import  BeautifulSoup
    url = "https://mp.weixin.qq.com/s/drle9K4jgVWxm4v14ETbpQ"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, features='html.parser')
    content = soup.prettify()
    html_content = etree.HTML(content)
    #//*[@id="js_content"]/section[2]/section/section[2]/table
    ret_data = html_content.xpath('//tr')
    for  item in ret_data:
        con = item.xpath("./td[1]/text()")
        con1 =item.xpath("./td[2]/text()")
        print(con[0].strip("
    ").strip(" ").strip("
    ") + con1[0].strip("
    ").strip(" "))
    

     

    import requests
    from  lxml   import  html
    etree = html.etree
    from  bs4 import  BeautifulSoup
    url = "https://mp.weixin.qq.com/s/Zt2K7aOfSr8mrSdArfzWAg"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, features='html.parser')
    content = soup.prettify()
    html_content = etree.HTML(content)
    #//*[@id="js_content"]/section[2]/section/section[2]/table
    #//*[@id="js_content"]/section[2]/section/section[2]/section[2]/section/section/table/tbody/tr[6]/td[1]/p/span[1]
    ret_data = html_content.xpath('//tr')
    for  item in ret_data:
        con = item.xpath("./td[1]/p/span[1]/text()")
        con1 = item.xpath("./td[1]/p/span[2]/text()")
        con2 =item.xpath("./td[2]/p/text()")
        con3 =item.xpath("./td[2]/p/span/text()")
        # print(con)
        # print(con1)
        # print(con2[0].strip("
    ").strip(" ").strip("
    "))
        # print(con3[0].strip("
    ").strip(" ").strip("
    "))
        if con1:
            print(con[0].strip("
    ").strip(" ").strip("
    ")+con1[0].strip("
    ").strip(" ").strip("
    ")+ "      答案→" +con2[0].strip("
    ").strip(" ").strip("
    ") + con3[0].strip("
    ").strip(" ").strip("
    ") )
        else :
            print(con[0].strip("
    ").strip(" ").strip("
    ") )
        # print( con1[0].strip("
    ").strip(" "))
        # print(con)
        # print(con1)

     

    下载csv文件,并获取其内容

    import csv
    import requests
    
    CSV_URL = 'https://www.remedy-cloud.com/download/csv/CVE-2020-1938'
    
    with requests.Session() as s:
        download = s.get(CSV_URL)
        decoded_content = download.content.decode('utf-8')
        cr = csv.reader(decoded_content.splitlines(), delimiter=',')
        my_list = list(cr)
        for row in my_list:
            print(row)
    

      

  • 相关阅读:
    制作类似QQ截图软件
    XML文件与实体类的互相转换
    MFC中真彩工具条的制作方法
    MFC使用技巧集锦(1)(转载)
    抽象工厂模式与工厂方法模式区别
    VC数据库编程分析
    如何让工具条显示256色图像
    华为软件编程规范和范例
    设计模式总结性试题
    VC++中基于ADO操作ACCESS数据库,FLEXGRID控件的综合应用
  • 原文地址:https://www.cnblogs.com/weidaijie/p/14118768.html
Copyright © 2011-2022 走看看