zoukankan      html  css  js  c++  java
  • 两段实际爬虫程序应用

    import requests
    from  lxml   import  html
    etree = html.etree
    from  bs4 import  BeautifulSoup
    url = "https://mp.weixin.qq.com/s/drle9K4jgVWxm4v14ETbpQ"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, features='html.parser')
    content = soup.prettify()
    html_content = etree.HTML(content)
    #//*[@id="js_content"]/section[2]/section/section[2]/table
    ret_data = html_content.xpath('//tr')
    for  item in ret_data:
        con = item.xpath("./td[1]/text()")
        con1 =item.xpath("./td[2]/text()")
        print(con[0].strip("
    ").strip(" ").strip("
    ") + con1[0].strip("
    ").strip(" "))
    

     

    import requests
    from  lxml   import  html
    etree = html.etree
    from  bs4 import  BeautifulSoup
    url = "https://mp.weixin.qq.com/s/Zt2K7aOfSr8mrSdArfzWAg"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, features='html.parser')
    content = soup.prettify()
    html_content = etree.HTML(content)
    #//*[@id="js_content"]/section[2]/section/section[2]/table
    #//*[@id="js_content"]/section[2]/section/section[2]/section[2]/section/section/table/tbody/tr[6]/td[1]/p/span[1]
    ret_data = html_content.xpath('//tr')
    for  item in ret_data:
        con = item.xpath("./td[1]/p/span[1]/text()")
        con1 = item.xpath("./td[1]/p/span[2]/text()")
        con2 =item.xpath("./td[2]/p/text()")
        con3 =item.xpath("./td[2]/p/span/text()")
        # print(con)
        # print(con1)
        # print(con2[0].strip("
    ").strip(" ").strip("
    "))
        # print(con3[0].strip("
    ").strip(" ").strip("
    "))
        if con1:
            print(con[0].strip("
    ").strip(" ").strip("
    ")+con1[0].strip("
    ").strip(" ").strip("
    ")+ "      答案→" +con2[0].strip("
    ").strip(" ").strip("
    ") + con3[0].strip("
    ").strip(" ").strip("
    ") )
        else :
            print(con[0].strip("
    ").strip(" ").strip("
    ") )
        # print( con1[0].strip("
    ").strip(" "))
        # print(con)
        # print(con1)

     

    下载csv文件,并获取其内容

    import csv
    import requests
    
    CSV_URL = 'https://www.remedy-cloud.com/download/csv/CVE-2020-1938'
    
    with requests.Session() as s:
        download = s.get(CSV_URL)
        decoded_content = download.content.decode('utf-8')
        cr = csv.reader(decoded_content.splitlines(), delimiter=',')
        my_list = list(cr)
        for row in my_list:
            print(row)
    

      

  • 相关阅读:
    无参考数据集
    dropout层
    postgresql查询表的大小
    vue 消息订阅与发布
    echarts实现pie自定义标签
    elementUI 时间线居左显示
    css+div实现各种常见边框
    css实现中括号边框
    div中多行内容垂直居中显示
    vue 实现组件全屏展示及退出
  • 原文地址:https://www.cnblogs.com/weidaijie/p/14118768.html
Copyright © 2011-2022 走看看