zoukankan html css js c++ java

初见网络爬虫

 1 #beautifulSoup
 2 from urllib.request import urlopen
 3 from bs4 import BeautifulSoup
 4 html = urlopen("http://www.pythonscraping.com/pages/page1.html")
 5 bsObj = BeautifulSoup(html,'html.parser')
 6 print(bsObj.h1)
 7 
 8 #处理异常
 9 html = urlopen("http://www.pythonscraping.com/pages/page1.html")
10 
11 #可能会发生两种异常
12 #1.网页在服务器上不存在
13 #2.服务器不存在
14 
15 #可以用下方式处理处理这种异常
16 
17 try:
18     html = urlopen("http://www.pythonscraping.com/pages/page1.html")
19     if html is None:
20         print("URL is not found")
21     else:
22         #程序继续
23 except HTTPError as e:
24     print(e)
25     #返回空值，中断程序，或者执行另一个方案
26 else:
27     #程序继续。注意：如果你已经在上面异常捕捉那一段代码里返回或中断
28     #那么就不需要使用else语句了，这段代码也不会执行
29 
30 
32 from urllib.request import urlopen
33 from urllib.error import HTTPError
34 from bs4 import BeautifulSoup
35 def getTitle(url):
36     try:
37         html = urlopen(url)
38     except HTTPError as e:
39         return None
40     try:
41         bsObj = BeautifulSoup(html,'html.parser')
42         title = bsObj.body.h1
43     except AttributeError as e:
44         return None
45     return title
46 title = getTitle("http://www.pythonscraping.com/pages/page1.html")
47 if title == None:
48     print("Title could not be found")
49 else:
50     print(title)

查看全文

相关阅读:
hibernate对应的annocation版本
 Struts 2 OGNL
Struts2的Stack Context和ValueStack
Struts2中的OGNL详解
 struts2中根对象以及ognl .
在Struts 2中实现IoC
Struts2的属性驱动与模型驱动的区别
 Struts2的模型驱动
 Java中线程的锁和数据库中的事务隔离级别
 为什么socket编程要用到多线程

原文地址：https://www.cnblogs.com/geeker-xjl/p/11078057.html