zoukankan html css js c++ java

两段实际爬虫程序应用

import requests
from  lxml   import  html
etree = html.etree
from  bs4 import  BeautifulSoup
url = "https://mp.weixin.qq.com/s/drle9K4jgVWxm4v14ETbpQ"
response = requests.get(url)
soup = BeautifulSoup(response.text, features='html.parser')
content = soup.prettify()
html_content = etree.HTML(content)
#//*[@id="js_content"]/section[2]/section/section[2]/table
ret_data = html_content.xpath('//tr')
for  item in ret_data:
    con = item.xpath("./td[1]/text()")
    con1 =item.xpath("./td[2]/text()")
    print(con[0].strip("
").strip(" ").strip("
") + con1[0].strip("
").strip(" "))

import requests
from  lxml   import  html
etree = html.etree
from  bs4 import  BeautifulSoup
url = "https://mp.weixin.qq.com/s/Zt2K7aOfSr8mrSdArfzWAg"
response = requests.get(url)
soup = BeautifulSoup(response.text, features='html.parser')
content = soup.prettify()
html_content = etree.HTML(content)
#//*[@id="js_content"]/section[2]/section/section[2]/table
#//*[@id="js_content"]/section[2]/section/section[2]/section[2]/section/section/table/tbody/tr[6]/td[1]/p/span[1]
ret_data = html_content.xpath('//tr')
for  item in ret_data:
    con = item.xpath("./td[1]/p/span[1]/text()")
    con1 = item.xpath("./td[1]/p/span[2]/text()")
    con2 =item.xpath("./td[2]/p/text()")
    con3 =item.xpath("./td[2]/p/span/text()")
    # print(con)
    # print(con1)
    # print(con2[0].strip("
").strip(" ").strip("
"))
    # print(con3[0].strip("
").strip(" ").strip("
"))
    if con1:
        print(con[0].strip("
").strip(" ").strip("
")+con1[0].strip("
").strip(" ").strip("
")+ "      答案→" +con2[0].strip("
").strip(" ").strip("
") + con3[0].strip("
").strip(" ").strip("
") )
    else :
        print(con[0].strip("
").strip(" ").strip("
") )
    # print( con1[0].strip("
").strip(" "))
    # print(con)
    # print(con1)

下载csv文件，并获取其内容

import csv
import requests

CSV_URL = 'https://www.remedy-cloud.com/download/csv/CVE-2020-1938'

with requests.Session() as s:
    download = s.get(CSV_URL)
    decoded_content = download.content.decode('utf-8')
    cr = csv.reader(decoded_content.splitlines(), delimiter=',')
    my_list = list(cr)
    for row in my_list:
        print(row)

查看全文

相关阅读:
.Net Discovery系列之深入理解平台机制与性“.NET技术”能影响(下) 狼人:
MEF——.NE“.NET技术”T中值得体验的精妙设计狼人:
.NET中的异步编程 IO完“.NET技术”成端口以及FileStream.BeginRead 狼人:
Entity Fr“.NET技术”amework 4.1 Code First 学习之路（二）狼人:
也玩MVC3.0 Razor自定义视图引擎“.NET技术”来修改默认的Views目录结构狼人:
引用类型赋值“.NET技术”为null与加速垃圾回收狼人:
在C#“.NET技术”中选择正确的集合进行编码狼人:
“.NET技术”Ajax和WEB服务数据格式：自定义返回格式狼人:
C#权限管理和设计浅“.NET技术”谈狼人:
带你走进缓“.NET技术”存世界狼人:

原文地址：https://www.cnblogs.com/weidaijie/p/14118768.html