CSS修饰器中ID是不会重复,class是会重复.
soup.select('#title') #代表找处id为title的元素 soup.select('.link') #代表找出class为link的元素
css选取属性方法:
soup.select('a')[0]['href'] #代表将a标签里面的href属性拿出来
html = """ <div class="panel"> <div class="panel-heading"> <h4>Hello</h4> <div class="panel-body"> <ul class="list" id="list-1" name="elements"> <li class="element">Foo</li> <li class="element">Bar</li> <li class="element">Jay</li> </ul> <ul class="list list-small" id="list-2"> <li class="element">Foo</li> <li class="element">Bar</li> </ul> </div> </div> """ from bs4 import BeautifulSoup soup = BeautifulSoup(html, 'html5lib') print(1, soup.select('.panel .panel-heading')) #凡是选择class里面的标签,在选择器中写入.panel这样类似的方式,这里表示查找panel里面的panel-heading,中间需要使用空格表示 print(2, soup.select('ul li')) # 直接选择标签,标签前不用添加.,这里代表选择ul里面的li print(3, soup.select('#list-2 .element')) # 如果需要选择id里面的内,则要选择#,这里是id='list-2'的element标签 print(4, type(soup.select('ul')[0]))
1 [<div class="panel-heading"> <h4>Hello</h4> <div class="panel-body"> <ul class="list" id="list-1" name="elements"> <li class="element">Foo</li> <li class="element">Bar</li> <li class="element">Jay</li> </ul> <ul class="list list-small" id="list-2"> <li class="element">Foo</li> <li class="element">Bar</li> </ul> </div> </div>] 2 [<li class="element">Foo</li>, <li class="element">Bar</li>, <li class="element">Jay</li>, <li class="element">Foo</li>, <li class="element">Bar</li>] 3 [<li class="element">Foo</li>, <li class="element">Bar</li>] 4 <class 'bs4.element.Tag'>
html = """ <div class="panel"> <div class="panel-heading"> <h4>Hello</h4> <div class="panel-body"> <ul class="list" id="list-1" name="elements"> <li class="element">Foo</li> <li class="element">Bar</li> <li class="element">Jay</li> </ul> <ul class="list list-small" id="list-2"> <li class="element">Foo</li> <li class="element">Bar</li> </ul> </div> </div> """ from bs4 import BeautifulSoup soup = BeautifulSoup(html, 'html5lib') for ul in soup.select('ul'): print(ul.select('li')) #层层迭代的方式打印
[<li class="element">Foo</li>, <li class="element">Bar</li>, <li class="element">Jay</li>] [<li class="element">Foo</li>, <li class="element">Bar</li>]
获取属性
html = """ <div class="panel"> <div class="panel-heading"> <h4>Hello</h4> <div class="panel-body"> <ul class="list" id="list-1" name="elements"> <li class="element">Foo</li> <li class="element">Bar</li> <li class="element">Jay</li> </ul> <ul class="list list-small" id="list-2"> <li class="element">Foo</li> <li class="element">Bar</li> </ul> </div> </div> """ from bs4 import BeautifulSoup soup = BeautifulSoup(html, 'html5lib') for ul in soup.select('ul'): print(ul['id']) print(ul.attrs['id'])
list-1 list-1 list-2 list-2
获取内容
html = """ <div class="panel"> <div class="panel-heading"> <h4>Hello</h4> <div class="panel-body"> <ul class="list" id="list-1" name="elements"> <li class="element">Foo</li> <li class="element">Bar</li> <li class="element">Jay</li> </ul> <ul class="list list-small" id="list-2"> <li class="element">Foo</li> <li class="element">Bar</li> </ul> </div> </div> """ from bs4 import BeautifulSoup soup = BeautifulSoup(html, 'html5lib') for li in soup.select('li'): print(li.get_text())
Foo Bar Jay Foo Bar