直接上代码:(商品名称、单价、图片链接)
import pymysql import requests import re def getHTMLText(url): kv = {'cookie':'thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; enc=ooWAQ8HPiBkBlDgWaQ2BoQXFD4cHXejeOP0Nq7xvbCuGN5yubT%2ByBjrb2j417KSrQkoR9YQxMFoqYufejy7Hlw%3D%3D; _m_h5_tk=9cc0be22588c97655e9e0ed031f29703_1589472803622; _m_h5_tk_enc=8fd3fcd9077f0f17bcb2dc4f9d593617; cookie2=1a0da2cc9535ebe4f7bd2787bebb9da1; t=0a472589a79eda4e33e9b072e3446525; _tb_token_=e136e0330e37e; alitrackid=www.taobao.com; _samesite_flag_=true; cna=cAVGFNpuDkUCAXkcRV0prgNa; sgcookie=EGxNSorLw1t5Dg21WTFJw; unb=3361002229; uc3=nk2=qA%2Fo8e0UjX1l%2BUs%3D&lg2=UtASsssmOIJ0bQ%3D%3D&id2=UNN78Eg15kheYA%3D%3D&vt3=F8dBxGZobO%2BfXgtBG40%3D; csg=228d2d4a; lgc=%5Cu5E05%5Cu6C14%5Cu5CF0happy; cookie17=UNN78Eg15kheYA%3D%3D; dnk=%5Cu5E05%5Cu6C14%5Cu5CF0happy; skt=aa63ca1a4a6e356c; existShop=MTU4OTYzNTEyOQ%3D%3D; uc4=nk4=0%40qjS8tzpCQQHfNZapqmDNrmd4%2F2Dhnw%3D%3D&id4=0%40UgQz06zOiEqwpAtViK7HqZlIKslx; tracknick=%5Cu5E05%5Cu6C14%5Cu5CF0happy; _cc_=U%2BGCWk%2F7og%3D%3D; _l_g_=Ug%3D%3D; sg=y99; _nk_=%5Cu5E05%5Cu6C14%5Cu5CF0happy; cookie1=UNJSu1S2nK7AhsBSrVKq4Nd7T4K1fH40ygcHPrTYWeA%3D; lastalitrackid=login.taobao.com; tfstk=ccVGB7cFtRk12ge_PFG_ovbuN0aGaiXZfSPUT5GPww8ivnNE7sYkLLSdMwmTjSpf.; uc1=cookie16=URm48syIJ1yk0MX2J7mAAEhTuw%3D%3D&cookie21=VT5L2FSpccLuJBreK%2BBd&cookie15=UIHiLt3xD8xYTw%3D%3D&existShop=false&pas=0&cookie14=UoTUM2YYf7HXaw%3D%3D; mt=ci=21_1; v=0; JSESSIONID=EC27C014A7BFB337D51D380F31E05C14; l=eBTRDRMIq3U2_UibBOfwourza77OSIRAguPzaNbMiOCPO_fp5GiGWZb3Hg89C3GVh67HR3J_JUNTBeYBqIv4n5U62j-la_kmn; isg=BA4O1D8B3Mb76GvkyGwHSEkCX-TQj9KJQPZkSjhXepHMm671oB8imbRZ08f3g8qh', 'user-agent':'Mozilla/5.0'} try: r = requests.get(url, headers=kv,timeout=30) r.raise_for_status() r.encoding = r.apparent_encoding #print(r.text) return r.text except: return "" def parsePage(ilt, html): try: plt = re.findall(r'"view_price":"[d.]*"', html) tlt = re.findall(r'"raw_title":".*?"', html) pic=re.findall(r'"pic_url":".*?"',html) k=0 kind="甘果类零食" for i in range(len(plt)): price = eval(plt[i].split(':')[1]) title = eval(tlt[i].split(':')[1]) img="https:"+eval(pic[i].split(':')[1]) oldprice=price.replace('1','4') ilt.append([k,title,price,oldprice,100,img,kind]) except: print("") def printGoodsList(ilt): db = pymysql.connect("localhost", "root", "511924", "summerperiod", charset='utf8') cursor = db.cursor() sql_cixian = "INSERT INTO food values (%s,%s,%s,%s,%s,%s,%s)" cursor.executemany(sql_cixian, ilt) db.commit() db.close() tplt = "{:4} {:8} {:16} {:16}" print(tplt.format("序号", "价格", "商品名称","商品图片")) count = 0 for g in ilt: count = count + 1 print(tplt.format(count, g[0], g[1],g[2])) def main(): goods = '甘果类零食' depth = 2 start_url = 'https://s.taobao.com/search?q=' + goods infoList = [] for i in range(depth): try: url = start_url + '&s=' + str(44 * i) html = getHTMLText(url) parsePage(infoList, html) except: continue printGoodsList(infoList) main()
更改后缀和链接就可以爬取你想要的商品。(这里是以食品为例)