zoukankan      html  css  js  c++  java
  • 网络爬虫基础练习

    import requests
    from bs4 import BeautifulSoup
    newsurl='http://localhost:63342/untitled/3/29.html?_ijt=ltocl4v68kb1po4608e3291lkm'
    res=requests.get(newsurl)
    res.encoding="utf-8"
    soup=BeautifulSoup(res.text,'html.parser')
    print(res.text)
    print("1-----------------")
    print(soup.div)
    print("2-----------------")
    print(soup.p) #标签名,返回第一个
    print("3-----------------")
    print(soup.head)
    print("4-----------------")
    print(soup.p.name) #字符串
    print("5-----------------")
    print(soup.p. attrs) #字典,标签的所有属性
    print("6-----------------")
    print(soup.p. contents) # 列表,所有子标签
    print("7-----------------")
    print(soup.p.text) #字符串
    print("8-----------------")
    print(soup.p.string)
    print("9-----------------")
    print(soup.select('li'))
    print("10-----------------")
    print(soup.select('#p1Node'))
    print("11-----------------")
    print(soup.select('.news-list-title'))
    print("12-----------------")
    #  取出h1标签的文本
    print(soup.h1.text)
    print("13-----------------")
    # 取出a标签的链接
    print(soup.a.attrs['href'])
    print("14-----------------")
    # 取出所有li标签的所有内容
    for i in soup.select('li'):
        print(i.contents)
    print("15-----------------") print(soup.select('.news-list-title')[0].text) print(soup.li.a.attrs['href']) print(soup.select('.news-list-info')[0].contents[0].text) print(soup.select('.news-list-info')[0].contents[1].text)

  • 相关阅读:
    P4611 [COCI2011-2012#7] TRAMPOLIN
    P3119 [USACO15JAN]草鉴定Grass Cownoisseur
    P4417 [COCI2006-2007#2] STOL
    P4645 [COCI2006-2007 Contest#3] BICIKLI
    P1155 双栈排序
    P4610 [COCI2011-2012#7] KAMPANJA
    P4329 [COCI2006-2007#1] Bond
    EZOJ #227
    EZOJ #226
    p4980 polya定理
  • 原文地址:https://www.cnblogs.com/piglet00/p/8672196.html
Copyright © 2011-2022 走看看