zoukankan      html  css  js  c++  java
  • python

    import requests
    from bs4 import BeautifulSoup
    import sqlite3
    
    conn = sqlite3.connect("test.db")
    c = conn.cursor()
    
    for num in range(1,101):
        url = "https://cs.lianjia.com/ershoufang/pg%s/"%num
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/64.0.3282.140 Safari/537.36',
        }
    
        req = requests.session()
        response = req.get(url, headers=headers, verify=False)
    
        info = response.text
        f1 = BeautifulSoup(info,'lxml')
        f2 = f1.find(class_='sellListContent')
        f3 = f2.find_all(class_='clear LOGCLICKDATA')
    
    
        for i in f3:
            data_id = i.find(class_="noresultRecommend").get('data-housecode')
            href = i.find( class_ ="noresultRecommend img ").get('href')
            title = i.find(class_ ="title").get_text()
            adress = i.find(class_="houseInfo").get_text().split("|")
            jage = i.find(class_="totalPrice").get_text()
    
            # print(k,data_id, '|', title, '|', adress, '|', jage, '|', href)
            # print("---")
    
    
            dz = ''
            fx = ''
            dx = ''
            cx = ''
            zx = ''
            dt = ''
            if len(adress) == 6:
                dz = adress[0]
                fx = adress[1]
                dx = adress[2]
                cx = adress[3]
                zx = adress[4]
                dt = adress[5]
            elif len(adress) ==5:
                dz = adress[0]
                fx = adress[1]
                dx = adress[2]
                cx = adress[3]
                zx = adress[4]
                dt = 'None'
            # print(dz,fx,dx,cx,zx,dt)
            elif len(adress) < 5:
                print(dz, fx, dx, cx, zx, dt)
    
            info = {'nid':int(data_id),
                    'title':title,
                    'dz':dz,
                    'fx':fx,
                    'dx':dx,
                    'cx':cx,
                    'zx':zx,
                    'dt':dt,
                    'jg':jage,
                    'url':href}
            # print(info)
    
            x = info
            sql = "insert into rsf(nid,dz,fx,dx,cx,zx,dt,jg,title,url)values(%d,'%s','%s','%s','%s','%s','%s','%s','%s','%s')" % (x['nid'], x['dz'], x['fx'], x['dx'], x['cx'], x['zx'], x['dt'], x['jg'], x['title'], x['url'])
            cursor = c.execute(sql)
            conn.commit()
            # print("OK")
    
    conn.close()
    
    
        # import json
        # file_path = 'info%s.txt'%num
        # json_data = json.dumps(info_list).encode('utf8')
        # with open(file_path,'wb') as f:
        #     f.write(json_data)

    sqlite3 读取数据

    import sqlite3
    conn = sqlite3.connect("test.db")
    c = conn.cursor()
    
    #sqlit3 查询数据
    cursor = c.execute("SELECT * from rsf")
    k = 1
    for row in cursor:
        num = float(row[7].split('')[0])
        if 30.0 < num < 50.0:
            print(k,row[1],row[3],num,row[-2])
            k +=1
    
    conn.close()
  • 相关阅读:
    自制的 MPlayer Skin
    mplayer filter 参数及效果
    可拖动的层DIV的完整源代码【转】
    Hibernate的检索方式(一)【转】
    HQL经典语句
    常适用的特效网页代码
    C#优化字符串操作【转】
    Hibernate的检索方式(二)【转】
    内联inline的使用方法【转】
    Hibernate的检索方式(三)【转】
  • 原文地址:https://www.cnblogs.com/Anec/p/9940989.html
Copyright © 2011-2022 走看看