zoukankan      html  css  js  c++  java
  • 淘宝比价

    #抓取淘宝数据
    import re
    import requests
    from bs4 import BeautifulSoup
    import  string
    import os
    import sqlite3
    class Getdata:
        def getHTMLText(url,header):
            try:
                r = requests.get(url,headers=header)
                r.raise_for_status()
                r.encoding = r.apparent_encoding
                return r.text
            except:
                return ""
      
        def parsePage(ilt,html):
            try:
                plt = re.findall(r'"view_price":"[d.]*"',html)
                tlt = re.findall(r'"raw_title":".*?"',html)
                for i in range(len(plt)):
                    price = eval(plt[i].split(":")[1])
                    title = eval(tlt[i].split(":")[1])
                    ilt.append([price,title])
            except:
                print("爬取失败")
    
        def GetCount(html):
            total=re.findall('"totalPage":d+',html)
            for i in range(len(total)):
                totalPage = eval(total[i].split(":")[1])
            return totalPage
    
        def printGoodsList(ilt):
            tplt = "{:4}	{:8}	{:16}"
            print(tplt.format("序号","商品价格","商品名称"))
            count = 0
            for g in ilt:
                count = count + 1
                print(tplt.format(count,g[0],g[1]))
    
    class DatabaseMannege:
        #创建数据表
        def CreateDataBase():
            try:
                db =sqlite3.connect("taobao.db")
            except:
                print("创建数据库失败")
            try:
                db.execute('create table GoodMsg(id varchar(10),price varchar(10),name varchar(40))')
            except:
                print("创建表失败或表已经存在")
        ##将数据写入数据库
        def InsertDatabase(data):
            db =sqlite3.connect("taobao.db")
            cur = db.cursor()
            for property in data:
                try:
                    print("开始插入")
                    sql_insert = ("insert into GoodMsg(price,name)values('{}','{}')").format(property[0],property[1])
    
                    cur.execute(sql_insert)
                    db.commit()
                    print("插入成功")
                except :
                    print('插入失败')
    class Main:
        def main():
            print("请输入查询商品")
            goods = input()
            infoList = []
            start_url = "https://s.taobao.com/search?q=" + goods
            header = {"cookie":"thw=cn; cna=ktJ/FI8k0gQCAbaLv4XUGVvh; tg=0; enc=%2FDi9xgv2fnznKtXV88N9fUTdV6UcRLyw3G6h3pjdwcpbHwkSTh%2FO1B1zsb29cDTL5N8TU0t4TdkRNxzvKIn4Ig%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; tracknick=1052071694www; t=0a525deca2dff81647d91643519e7e37; UM_distinctid=16b9bd49a2a5ef-031997ebe67ce2-37c143e-144000-16b9bd49a2b92e; miid=1364685100501550517; _cc_=W5iHLLyFfA%3D%3D; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; _m_h5_tk=98af7fdaf32be92fe72127eda6e0044e_1571041861081; _m_h5_tk_enc=ca1bdc50118e6ce4e5fd587ccc946e6c; mt=ci%3D-1_0; v=0; cookie2=1aac9317cb43d8f5dfab37bd0222fcf9; _tb_token_=578e3e4e7eedb; JSESSIONID=021AC0B7547DE41EE0944D2ECB89C106; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; l=dBjS2MZrqT2zAZFsBOCgSZ1_aY79jIRAguWbYNq9i_5BK6L_qNbOkg25WFp6cjWfthYB4NSLztv9-etkiKy06Pt-g3fPNxDc.; isg=BHR0oiuylQB4VAH5skFM2Q9IRTLsTpjNHWdJ-w7VA_-CeRTDNlnkx4w7_fEEgdCP",
                        "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"}
            
            html = Getdata.getHTMLText(start_url,header)
            depth = Getdata.GetCount(html)
            DatabaseMannege.CreateDataBase()
            for i in range(depth):
                try:
                    url = start_url + "&s=" + str(44 * i)
                    html = Getdata.getHTMLText(url,header)
                    Getdata.parsePage(infoList,html)
                    Getdata.printGoodsList(infoList)
                    DatabaseMannege.InsertDatabase(infoList)
                except:
                    continue
    Main.main()
  • 相关阅读:
    2015年11月27日 野外生存(一)刀
    2015年11月26日 Java基础系列(五)异常Exception
    高斯混合模型(理论+opencv实现)
    K-means算法(理论+opencv实现)
    支持向量机(理论+opencv实现)
    《图像处理实例》 之 答题卡检测
    关于VS+ImageWatch在线调试问题
    关于W8.1不能安装VS2015(包括2017等)
    《图像处理实例》 之 车牌定位
    《opencv学习》 之 OTSU算法实现二值化
  • 原文地址:https://www.cnblogs.com/jestin/p/12911334.html
Copyright © 2011-2022 走看看