# from urllib import request from bs4 import BeautifulSoup # # req = request.Request("http://www.hngp.gov.cn/wsscnew/egp/public/gg_spzsxx/SpxhMainTab.html?xhbh=ff8080815c04a864015c596c4c177699&xmxh=null&area=00390019&xyghbh=ff80808151561b4701517a3e43825e4f&lastcgsl=0&cgje=0.0&lastcgje=0.0&cgsl=0&isnwwbz=ww&czy=null&lbbs=null") # req.add_header("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36") # res = request.urlopen(req) # res = request.urlopen(req) # # #print(res.read().decode("utf-8")) # res = res.read() # # # doc = open("aa.html","wb+") # # doc.write(res) # soup = BeautifulSoup(res, 'html.parser', from_encoding='utf-8') # #title = soup.strong # # company = soup.find_all(target='_Blank') # # print(company) from urllib import request,parse login_data = parse.urlencode([ ('formids','If,sl,jbcsPage,ghsPage,jgqsPage,picPage,spxqPage,Xzsp,Gwc,Xmxx,Dzdd,Ddys,selgys'), ('submitmode',''), ('submitname',''), ('If','F'), ('xhbh','ff8080815c04a864015c596c4c177699'), ('area','00390019'), ('ppmc','联想'), ('czy',''), ('scjg',4126.0), ('zdjg',4126.0), ('xyghbh','ff80808151561b4701517a3e43825e4f'), ('xmxh',''), ('lastcgsl',''), ('cgje',0), ('lastcgje',0), ('cgsl',0), ('isnwwbz','ww'), ('lbbs',''), ('gysdqzdbj','4126.0'), ('ghsmc','点击选择供应商'), ('sl',0), ('ghsPage','供货商'), ]) # 调用request包.Request对象 req = request.Request('http://www.hngp.gov.cn/wsscnew/egp/public/gg_spzsxx/SpxhMainTab,form.sdirect') req.add_header('Origin', 'http://www.hngp.gov.cn') req.add_header('Cookie','JSESSIONID=E6738337F2A4BAE45C6127C732DA7D54') req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25') req.add_header('Referer', 'http://www.hngp.gov.cn/wsscnew/egp/public/gg_spzsxx/SpxhMainTab.html?xhbh=ff8080815c04a864015c596c4c177699&xmxh=null&area=00390019&xyghbh=ff80808151561b4701517a3e43825e4f&lastcgsl=0&cgje=0.0&lastcgje=0.0&cgsl=0&isnwwbz=ww&czy=null&lbbs=null') # 调用request.urlopen对象,请求网址 res = request.urlopen(req,data=login_data.encode('utf-8')) # 在调用Beautiful对象之前,先读取网页内容 res = res.read() # 选择用哪一种网页解析器解析读取的网页(选择哪种编码) soup = BeautifulSoup(res, 'html.parser', from_encoding='utf-8') tr_list = soup.findAll('tr') for tr in tr_list: td_list = tr_list[1].findAll('td') price = td_list[4].getText() name = td_list[5].getText() if name != '韦玮' : exit() #print(data)