zoukankan      html  css  js  c++  java
  • python3之BeautifulSoup

    # -*- coding:utf-8 -*-
    # !/user/bin env python3
    from bs4 import BeautifulSoup
    
    html = """
    <html><head><title>The Dormouse's story</title></head>
    <body>
    <p class="title" name="dromouse"><b>The Dormouse's story</b></p>
    <p class="story">Once upon a time there were three little sisters; and their names were
    <a href="http://example.com/elsie" class="sister" id="link1"><!-- Elsie --></a>,
    <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
    <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
    and they lived at the bottom of a well.</p>
    <p class="story">...</p>
    """
    
    #标签选择器
    
    soup = BeautifulSoup(html,"html.parser")
    """
    print(soup.title)
    print(soup.title.name)
    print(soup.title.text)
    print(soup.head)
    print(soup.p)
    print(soup.a)
    #获取属性
    print("====获取属性====")
    print(soup.p["name"])
    print(soup.p.attrs["name"])
    #获取内容
    print("====获取内容====")
    print(soup.p.string)
    print(soup.p.text)
    #获取子节点,content属性将子节点以列表的形式输出,可以通过列表索引来获取元素
    print("====获取子节点====")
    print(soup.p.contents)
    print(soup.p.contents[0])
    
    print(soup.p.parent)
    print(soup.p.parent.name)
    
    content=soup.head.title.string
    for parent in content.parents:
        print(parent.name)
    """
    
    #标准选择器
    """
    print(soup.find_all("a")[0])
    print(soup.find_all("a"))
    print(soup.find_all(attrs={'name':'dromouse'}))
    print(soup.find_all(class_ = "title"))
    print(soup.find_all(text = "The Dormouse's story"))
    print(soup.find_all("a",limit = 1))
    print(soup.find("a"))
    """
    
    #css选择器
    # print(soup.select(".story .sister"))   #打印class属性为story的标签中class属性为sister的元素,需要加点
    # print(soup.select("p a"))              #打印p标签中a标签的内容
    # print(soup.select("#link2"))           #打印id为link2标签的内容
    # print(soup.select("p")[0])              #打印的是第一个p标签的内容
    # print(soup.select("p")[0]["class"])      #打印第一个p标签中class属性的名称
    # print(soup.select("p")[0].get_text)
    # print(soup.select("p")[0].text)
    print(soup.prettify())
  • 相关阅读:
    SDOI2017遗忘的集合
    菜鸡的考场emacs配置
    SDOI2017苹果树
    SDOI2017硬币游戏
    都11点了为什么还没有人阿克离场
    TJOI2013数字根
    HNOI2018毒瘤
    闵可夫斯基和
    三维凸包学习小记
    灭绝树学习小记
  • 原文地址:https://www.cnblogs.com/python-kp/p/15137787.html
Copyright © 2011-2022 走看看