zoukankan html css js c++ java

关于这个该死的报错：TypeError

在利用Selenium爬取页面信息的时候突然报错，第一条信息爬取的时候还好好的，第二条就不行了。

请参考网上的爬取代码：

 1 # coding=utf-8
 2 """
 3 Created on 2015-12-10 @author: Eastmount
 4 利用Selenium爬取百度百科5A级景区的内容介绍的代码
 5 """
 6 
 7 import time
 8 import re
 9 import os
10 import sys
11 import codecs
12 import shutil
13 from selenium import webdriver
14 from selenium.webdriver.common.keys import Keys
15 import selenium.webdriver.support.ui as ui
16 from selenium.webdriver.common.action_chains import ActionChains
17 
18 # Open PhantomJS
19 # driver = webdriver.PhantomJS(executable_path="D:phantomjs-2.1.1-windowssbinphantomjs.exe")
20 driver = webdriver.PhantomJS(executable_path="D:phantomjs-1.9.8-windowsphantomjs.exe")
21 # driver = webdriver.Firefox()
22 wait = ui.WebDriverWait(driver, 10)  # 显示等待时间(实例，最大等待时间)
23 
24 
25 # Get the Content of 5A tourist spots
26 def getInfobox(entityName, fileName):
27     try:
28         # create paths and txt files
29         print(u'文件名称: ', fileName)
30         info = codecs.open(fileName, 'w', 'utf-8')
31 
32         # locate input  notice: 1.visit url by unicode 2.write files
33         # Error: Message: Element not found in the cache
34         #       Perhaps the page has changed since it was looked up
35         # 解决方法: 使用Selenium和Phantomjs
36 
37         print(u'实体名称: ', entityName.rstrip('
'))
38         driver.get("http://baike.baidu.com/")
39         elem_inp = driver.find_element_by_xpath("//form[@id='searchForm']/input")
40         # elem_inp = driver.find_elements_by_xpath("//div[@class='lemma-summary']/div")
41         elem_inp.send_keys(entityName)  
42         elem_inp.send_keys(Keys.RETURN)
43         info.write(entityName.rstrip('
') + '
')  # codecs不支持'
'换行
44 
45         # load content 摘要
46         elem_value = driver.find_elements_by_xpath("//div[@class='lemma-summary']/div")
47         for value in elem_value:
48             print(value.text)
49             info.writelines(value.text + '
')
50 
51         # 爬取文本信息
52         # 爬取所有段落<div class='para'>的内容 class='para-title'为标题 [省略]
53 
54         time.sleep(2)
55     # except Exception as e:  # 'utf8' codec can't decode byte
56     #     print("Error: ", e)
57     finally:
58         print('
')
59         info.close()
60 
61 
62 # Main function
63 def main():
64     # By function get information
65     path = "BaiduSpider\"
66     if os.path.isdir(path):
67         shutil.rmtree(path, True)
68     os.makedirs(path)
69     source = open("Tourist_spots_5A.txt", 'r')
70     num = 1
71     for entityName in source:
72         # entityName = unicode(entityName, "utf-8")
73         if u'故宫' in entityName:  # else add a '?'
74             entityName = '北京故宫'
75         # else: Name = entityName.rstrip('
')
76         name = "%04d" % num
77         fileName = path + str(name) + ".txt"
78         getInfobox(entityName, fileName)
79         num = num + 1
80     print('End Read Files!')
81     source.close()
82     driver.close()
83 
84 
85 if __name__ == '__main__':
86     main()

执行报错信息为：

Traceback (most recent call last):
File "D:/pycharm/untitled_DB/wordcloud/selenium爬取百度百科/Selenium_baidu.py", line 85, in <module>
main()
File "D:/pycharm/untitled_DB/wordcloud/selenium爬取百度百科/Selenium_baidu.py", line 77, in main
getInfobox(entityName, fileName)
File "D:/pycharm/untitled_DB/wordcloud/selenium爬取百度百科/Selenium_baidu.py", line 41, in getInfobox
elem_inp.send_keys(Keys.RETURN)
File "C:UsersAdministratorAppDataLocalProgramsPythonPython36libsite-packagesseleniumwebdriver
emotewebelement.py", line 479, in send_keys


'value': keys_to_typing(value)})
File "C:UsersAdministratorAppDataLocalProgramsPythonPython36libsite-packagesseleniumwebdriver
emotewebelement.py", line 628, in _execute
return self._parent.execute(command, params)
File "C:UsersAdministratorAppDataLocalProgramsPythonPython36libsite-packagesseleniumwebdriver
emotewebdriver.py", line 312, in execute
self.error_handler.check_response(response)
File "C:UsersAdministratorAppDataLocalProgramsPythonPython36libsite-packagesseleniumwebdriver
emoteerrorhandler.py", line 208, in check_response
raise exception_class(value)
selenium.common.exceptions.WebDriverException: Message: TypeError - 'undefined' is not a function (evaluating '_getTagName(currWindow).toLowerCase()')

找了1天都没找到原因，真的死烦，找到原因是71行代码写死，然而要是不加判断也会出现这样的报错，比较郁闷，后来查了半天资料，在Stackoverflow的评论中找到思路，很有可能是read文件的时候，读取到的内容格式有问题，于是查看了一下格式发现，果不其然，多了一个"/n"，修改代码：

if u'故宫' in entityName:  # else add a '?'
　　entityName = '北京故宫'
else: 
　　entityName = entityName.rstrip('
')
name = "%04d" % num
fileName = path + str(name) + ".txt"
getInfobox(entityName, fileName)
num = num + 1

在执行，ok，请忽略渣渣排版

查看全文

相关阅读:
四则运算
 androidstdio导入工程报错
 日程代码任务1
软件团队模式选择
 初识软件工程
 java数组中最大的子数组之和
 解决键盘布局错误(日文系统)
固态硬盘的更替
 ZendDebugger的配置
 apache命令行启动

原文地址：https://www.cnblogs.com/xiapu5150/p/8528774.html