1 #! /usr/bin/env python 2 # _*_ coding:utf-8 _*_ 3 import requests 4 from bs4 import BeautifulSoup 5 from urllib.parse import urlparse 6 import sys 7 8 9 def bing_search(site, pages): 10 Subdomain = [] 11 headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0', 12 'Accept': '*/*', 13 'Accept-Language': 'en-US,en;q=0.5', 14 'Accept-Encoding': 'gzip,deflate', 15 'referer': "http://cn.bing.com/search?q=email+site%3abaidu.com&qs=n&sp=-1&pq=emailsite%3abaidu.com&first=2&FORM=PERE1" 16 } 17 for i in range(1, int(pages) + 1): 18 url = "https://cn.bing.com/search?q=site%3a" + site + "&go=Search&qs=ds&first=" + str( 19 (int(i) - 1) * 10) + "&FORM=PERE" 20 conn = requests.session() 21 conn.get('http://cn.bing.com', headers=headers) 22 html = conn.get(url, stream=True, headers=headers, timeout=8) 23 soup = BeautifulSoup(html.content, 'html.parser') 24 job_bt = soup.findAll('h2') 25 for i in job_bt: 26 link = i.a.get('href') 27 domain = str(urlparse(link).scheme + "://" + urlparse(link).netloc) 28 if domain in Subdomain: 29 pass 30 else: 31 Subdomain.append(domain) 32 print(domain) 33 return Subdomain 34 35 36 if __name__ == '__main__': 37 # site=baidu.com 38 if len(sys.argv) == 3: 39 site = sys.argv[1] 40 page = sys.argv[2] 41 else: 42 print("usage: %s baidu.com 10" % sys.argv[0]) 43 sys.exit(-1) 44 Subdomain = bing_search(site, page)
使用方法:
Python subdomain.py url page