from BeautifulSoup import BeautifulSoup
import re
doc = ['<html><head><title>Page title</title></head>',
'<body><p id="firstpara" align="center">This is paragraph <b>one</b>.',
'<p id="secondpara" align="blah">This is paragraph <b>two</b>.',
'</html>']
soup = BeautifulSoup(''.join(doc))
print soup.prettify()
运行结果为:
print soup.contents[0].name
#
print soup.contents[0].contents[0].name
for i in range(len(soup.contents[0])):
print soup.contents[0].contents[i].name
titleTag = soup.html.head.title
titleTag
# <title>Page title</title>
titleTag.string
# u'Page title'
len(soup('p'))
# 2
soup.findAll('p', align="center")
# [<p id="firstpara" align="center">This is paragraph <b>one</b>. </p>]
soup.find('p', align="center")
# <p id="firstpara" align="center">This is paragraph <b>one</b>. </p>
soup('p', align="center")[0]['id']
# u'firstpara'
soup.find('p', align=re.compile('^b.*'))['id']
# u'secondpara'
soup.find('p').b.string
# u'one'
soup('p')[1].b.string
# u'two'
