闲来无事,用Python写的小实例。
何为搜索引擎关键字建议呢。 看图吧,红框中内容的就是搜索引擎自动提示的关键字搜索建议。
直接用浏览器的开发者工具抓取地址和返回的数据。
包括百度、谷歌和360。
1 #coding=utf8 2 import cookielib 3 import urllib 4 import urllib2 5 import json 6 import re 7 8 def getSuggestion(word): 9 headers={'User-Agent':'Mozilla/4.0'} 10 url = "http://suggestion.baidu.com/su?wd="+word+"&p=3&cb=window.bdsug.sug&from=superpage&t=1392097137657" 11 req = urllib2.Request(url,None,headers) 12 res_data = urllib2.urlopen(req) 13 res = res_data.read() 14 #print res 15 16 m=re.search('s:.*',res) 17 s=m.group(0) 18 s=s.replace('s:','').replace('});','') 19 s=s.decode('gbk') 20 sugs=json.loads(s) 21 return sugs 22 23 while True: 24 wd=raw_input('Input a keyword:') 25 if wd: 26 sugs=getSuggestion(wd) 27 if sugs: 28 for sug in sugs: 29 print sug 30 else: 31 print 'None' 32 continue 33 else: 34 break 35 #coding=utf8 36 import cookielib 37 import urllib 38 import urllib2 39 import json 40 import re 41 42 def getSuggestion(word): 43 headers={'User-Agent':'Mozilla/5.0'} 44 #url = "https://www.google.com.hk/complete/search?client=hp&hl=zh-CN&sugexp=ernk_timepromotiona&gs_rn=35&gs_ri=hp&tok=rNp0iPattR-lTwVnhtFFxg&cp=1&gs_id=3i0&q="+word+"&xhr=t" 45 #req = urllib2.Request(url,None,headers) 46 url = "https://www.google.com.hk/complete/search?client=hp&q="+word 47 res_data = urllib2.urlopen(url) 48 res = res_data.read() 49 s=res.replace('window.google.ac.h(','').replace(')','') 50 sugs=json.loads(s) 51 return sugs[1] 52 53 while True: 54 wd=raw_input('Input a keyword:') 55 if wd: 56 sugs=getSuggestion(wd) 57 if sugs: 58 for sug in sugs: 59 print sug[0] 60 else: 61 print 'None' 62 continue 63 else: 64 break 65 #coding=utf8 66 import cookielib 67 import urllib 68 import urllib2 69 import json 70 import re 71 72 def getSuggestion(word): 73 headers={'User-Agent':'Mozilla/4.0'} 74 url = "http://sug.so.360.cn/suggest?callback=suggest_so&encodein=utf-8&encodeout=utf-8&format=json&fields=word,obdata&word="+word 75 req = urllib2.Request(url,None,headers) 76 res_data = urllib2.urlopen(req) 77 res = res_data.read().decode('utf8') 78 79 s=res.replace('suggest_so(','').replace(');','') 80 sugs=json.loads(s) 81 return sugs['result'] 82 83 while True: 84 wd=raw_input('Input a keyword:') 85 if wd: 86 sugs=getSuggestion(wd) 87 if sugs: 88 for sug in sugs: 89 print sug['word'] 90 else: 91 print 'None' 92 continue 93 else: 94 break
直接抓的网页数据,原理非常简单。 均未深入做编码处理,所以中文支持不好。