zoukankan      html  css  js  c++  java
  • 13 Beautiful Soup库的基本元素

    举例:

    
    
    """Beautiful Soup库的基本元素"""


    import requests
    from bs4 import BeautifulSoup

    url = "https://python123.io/ws/demo.html"
    r = requests.get(url)
    demo = r.text
    soup = BeautifulSoup(demo, "html.parser")
    #print(soup.prettify())

    # <title>This is a python demo page</title>
    print(soup.title)

    tag = soup.a
    # <a class="py1" href="http://www.icourse163.org/course/BIT-268001" id="link1">Basic Python</a>
    print(tag)
    # a
    print(soup.a.name)
    # p
    print(soup.a.parent.name)
    # body
    print(soup.a.parent.parent.name)
    # html
    print(soup.a.parent.parent.parent.name)
    # [document]
    print(soup.a.parent.parent.parent.parent.name)
    # {'href': 'http://www.icourse163.org/course/BIT-268001', 'class': ['py1'], 'id': 'link1'}
    print(tag.attrs)
    # ['py1']
    print(tag.attrs['class'])
    # http://www.icourse163.org/course/BIT-268001
    print(tag.attrs['href'])
    # <class 'dict'>
    print(type(tag.attrs))
    # <class 'bs4.element.Tag'>
    print(type(tag))


    # Basic Python
    print(soup.a.string)
    # <p class="title"><b>The demo python introduces several python courses.</b></p>
    print(soup.p)
    # The demo python introduces several python courses.
    print(soup.p.string)
    # <class 'bs4.element.NavigableString'>
    print(type(soup.p.string))


    # HTML注释(comment)的类型
    """Beautiful Soup库的基本元素"""
    
    
    import requests
    from bs4 import BeautifulSoup
    
    # HTML注释(comment)的类型
    newHTML = "<b><!--This is a comment--></b><p>This is not a comment</p>"
    newsoup = BeautifulSoup(newHTML, "html.parser")
    # This is a comment
    print(newsoup.b.string)    #只能获取注释一行信息;print(newsoup.b.contents)# 可获取多行信息

    # This is not a comment
    print(newsoup.p.string)
    # <class 'bs4.element.Comment'>
    print(type(newsoup.b.string))
    # <class 'bs4.element.NavigableString'>
    print(type(newsoup.p.string))
  • 相关阅读:
    php判断值是否为空
    MyQQ
    java动态定义二维数组问题
    回归CSDN
    字体工具栏
    传说中的服务器
    sql server 2000 出现不能执行查询,因为一些文件丢失或未注册
    chm文件打不开
    Cmd不能运行,窗口闪一下就消失
    isql病毒
  • 原文地址:https://www.cnblogs.com/sruzzg/p/13046881.html
Copyright © 2011-2022 走看看