import requests
res=requests.get('http://www.baidu.com')
res.encoding='utf-8'
print(res.text)
from bs4 import BeatifulSoup
html = """
... <html><head>head title</head><p>history</p></html>"""
soup=BeautifulSoup(html)
print(soup.prettify())
print(soup.select('p'))
print(soup.select('p')[0])
print(soup.select('p')[0].text)
print(soup.p)
print(soup.p.attr)
print(soup.find_all('p'))
print(soup.find_all(id='dwww'))
////////////////++++++++++////////
names = soup.find_all('td', class_="job")
re.findAll(">(.{2,5})</a>", names) //正则表达式匹配a链接中任意2到5个字符
soup re组合使用
////////////////++++++++++////////
links=soup.select('p')
for link in links:
print(link.text)