zoukankan      html  css  js  c++  java
  • 时间

    import requests
    from bs4 import  BeautifulSoup
    import lxml
    import re
    import time
    import random
    import pymysql.cursors
    from selenium import webdriver
    import pandas
    import numpy
    connection = pymysql.connect(host='localhost',user='root',password='123',db='abc',charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
    
    with connection.cursor() as cursor:
        sql = "select * from 竞店"
        cursor.execute(sql)
        shop_id = cursor.fetchall()
    connection.commit()
    payload = {
        "Ancoding":"gzip, deflate, sdch, br",
    "Accept-Language":"zh-CN,zh;q=0.8",
    "Connection":"keep-alive",
    "Cookie":"hng=; uss=UIMY14A%2B04Bbq%2BqRxS6C9OzJWudsw14Q1kb5mDDqxW%2BQ3YG%2BUcpgrDRWnRQ%3D; uc3=sg2=AC4AfXCJ7XkLw0gCUD1tD9ZxhXFdweN2A6VfybWadxI%3D&nk2=&id2=&lg2=; t=3c0787f77a28e0854ef28fc360b2c555; cookie2=1c912d33e44bdb2008763748702a61f4; _tb_token_=78577371d8136; l=AiQkmjyCyPnG7qTN1Iu5fBqvdCgWvUgn; isg=AvDwL_qYXdDeegACSXGXiIOKwb7f2NSDXgsSOepBvMsepZFPkkmkE0aNixo_; pnm_cku822=; cna=T7gREcWMLDsCAavWmjBJPJpS; Hm_lvt_c478afee593a872fd45cb9a0d7a9da3b=1495496950; Hm_lpvt_c478afee593a872fd45cb9a0d7a9da3b=1495496950",
    "Host":"tanggulake.tmall.com",
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
    "X-Requested-With":"XMLHttpRequest"}
    ues_age=["Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0,""Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50","Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50","Mozilla/5.0 (Windows; U; Windows NT 6.1; ) AppleWebKit/534.12 (KHTML, like Gecko) Maxthon/3.0 Safari/534.12","Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)","Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)","Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1 QQBrowser/6.9.11079.201","Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"]
    def pig(url):
        url_re = requests.get(url + "1", params=payload)
        soup = BeautifulSoup(url_re.text, "lxml")
        pig = soup.select("div >  div > div > div > span:nth-of-type(1)")
        return (pig[2].text.split("/"))[1]
    def xingxi(x):
        ids=[]
        pigg=[]
        dates1=[]
        for pig_id in range(1,int(pig(x))+1):
                ur1 = x + str(pig_id)
                url_re1 = requests.get(ur1, params=payload)
                time.sleep(random.randrange(1,5))
                soup = BeautifulSoup(url_re1.text, "lxml")
                date = soup.select("div > div > div > dl")
                for spid in date:
                    ids.append(re.sub("D", "", spid.get("data-id")))
    
                date = soup.select("div > div > div > dl")
                imgs = soup.select("img")  # 图片
                for imgasd in imgs:
                    w = imgasd.get("src")
                    p = re.match(r".*//(.*?.jpg)", w)
                    pigg.append(r"https://" + p.group(1))
                shuju2 = pandas.DataFrame(pigg)
                shuju2 = shuju2.rename(columns={0: "图片链接"})
                date = soup.select("div > div > div > dl")
                dated = soup.select("dl")  # 获取网页信息
                for i in dated:
                    c = list(i.stripped_strings)  # 删除空格
                    b = [elem for elem in c if elem != '']  # 过滤
                    dates1.append([b[0], b[2]])
        shuju2 = pandas.DataFrame(pigg)
        shuju2 = shuju2.rename(columns={0: "图片链接"})
        shuju3 = pandas.DataFrame(ids)
        shuju3 = shuju3.rename(columns={0: "id"})
        shuju1 = pandas.DataFrame(dates1)  # 写入
        shuju1 = shuju1.rename(columns={0: "标题", 1: "价格"})
        return pandas.concat([shuju1, shuju2, shuju3], axis=1)
    def how_much(ids,shop_id):
        driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true', '--load-images=false'])
        try:
            driver.get("http://item.taobao.com/item.htm?id=" + ids)
            time.sleep(random.randrange(1, 5))
            date = driver.page_source
        except:
            driver.quit()
            driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true', '--load-images=false'])
            driver.get("http://item.taobao.com/item.htm?id=" +ids)
            date = driver.page_source
        time.sleep(random.randrange(8,13))
        soup = BeautifulSoup(date, "lxml")
        a = [i for i in list(soup.select("script")) if len(str(i)) > 1000]
        new_time = re.findall(r".*dbst:(.d*)", str(a[0]).replace(" ", ""))[0][0:10]
        return time.strftime("%Y-%m-%d", time.localtime(int(new_time)))
    for dress in shop_id:
    
        with connection.cursor() as cursors:
            # Create a new
            sql = 'select id from' + " " + dress["店铺名称"]
            cursors.execute(sql)
            fff = cursors.fetchall()
            fff = [i["id"] for i in fff]
            for i in fff:
                with connection.cursor() as cursorss:
                    dates = how_much(i, dress["店铺名称"])
                    sql = "UPDATE " + dress["店铺名称"] + " set 上架时间= '%s' where id = '%s'" % (dates,i)
                    print(sql)
                    cursorss.execute(sql)
                connection.commit()
  • 相关阅读:
    11 [异常]-try...except
    3-4 网络编程 练习题
    31 选课系统
    3-3 面向对象 本章总结
    3-2 面向对象中级练习题
    3-1 面向对象练习题
    11 [面向对象]-面向对象的软件开发
    10 [面向对象]-元类
    App集成极光推送开发流程[关键步骤]
    App集成极光推送步骤
  • 原文地址:https://www.cnblogs.com/gao-xiang/p/6999148.html
Copyright © 2011-2022 走看看