zoukankan      html  css  js  c++  java
  • 查排名

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    from selenium.common.exceptions import TimeoutException
    import os, time, random
    from multiprocessing import Pool
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.keys import Keys
    from selenium.common.exceptions import TimeoutException
    from selenium.webdriver.common.action_chains import ActionChains
    from selenium.webdriver.chrome.options import Options
    from pyquery import PyQuery as pq
    import pymysql
    import time
    import asdl
    import random
    import requests
    import urllib.request
    from urllib import parse

    def get_platform(url):
    url_index = url.index('://')
    url_index += 3
    new_url = url[url_index:]
    url_index = new_url.index('/')
    platform = new_url[:url_index]
    return platform

    def get_url(url):
    resp=urllib.request.urlopen(url)
    data=resp.read().decode('utf-8')
    data_index = data.lower().find('url=')
    if data_index != -1:
    data_index += 4
    new_data = data[data_index:]
    data_index = new_data.find('"')
    url = new_data[:data_index]
    return url.strip("'")
    def saveData(sql):
    T = 1
    while T:
    db = pymysql.connect(host="47.94.36.26",user="seo",passwd='djAcfKNHxF',db='seo',charset='utf8')
    cursor = db.cursor(cursor=pymysql.cursors.DictCursor)
    try:
    cursor.execute(sql)
    db.commit()
    print('ok')
    T = 0
    except:
    db.rollback()
    print('error:'+sql)
    print('提交失败,请您和开发人员联系,谢谢合作!')
    time.sleep(30)
    def chrom(data):
    data_id = data['id']
    #删除关键词
    sql = "delete from ganen_keys_cover where id = '%s'"%(data_id)
    saveData(sql)
    words = data['words']
    uid = data['uid']
    rule = data['rule']
    author = data['author']
    #拼接url
    url = 'https://m.baidu.com/s?'
    dict1 ={'word': words}
    url_data = parse.urlencode(dict1)
    url = url + url_data
    #百度贴吧bug
    headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36',
    'Cookie':'gr_user_id=1f9ea7ea-462a-4a6f-9d55-156631fc6d45; bid=vPYpmmD30-k; ll="118282"; ue="codin; __utmz=30149280.1499577720.27.14.utmcsr=douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/doulist/240962/; __utmv=30149280.3049; _vwo_uuid_v2=F04099A9dd; viewed="27607246_26356432"; ap=1; ps=y; push_noty_num=0; push_doumail_num=0; dbcl2="30496987:gZxPfTZW4y0"; ck=13ey; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1515153574%2C%22https%3A%2F%2Fbook.douban.com%2Fmine%22%5D; __utma=30149280.833870293.1473539740.1514800523.1515153574.50; __utmc=30149280; _pk_id.100001.8cb4=255d8377ad92c57e.1473520329.20.1515153606.1514628010.'
    }
    res = requests.get(url, headers = headers)
    data = res.text
    #获取数据
    doc = pq(data)
    results = doc(doc.html()).find('#results').children('div')
    #分析数据
    n = 0
    for i in results:
    adv = doc(i).find("span:contains('广告')")
    if not adv:
    n += 1
    doc_i = doc(i)
    title_find = doc_i.find(":contains('"+rule+"')").text()
    if title_find :
    #标题
    title = doc_i.find("h3").text()
    try:
    print('标题:'+title)
    url = doc_i.find("a").attr('href')
    url = get_url(url)
    platform = get_platform(url)
    ctime = int(time.time())
    ranking = n
    sql = "insert into ganen_keys_results(words,ranking,title,url,rule,platform,uid,author,create_time) "
    sql += " values('%s','%s','%s','%s','%s','%s','%s','%s','%s')"%(words,ranking,title,url,rule,platform,uid,author,ctime)
    saveData(sql)
    except:
    print('error:'+title)

    if __name__=='__main__':
    db = pymysql.connect(host="47.94.36.26",user="seo",passwd='djAcfKNHxF',db='seo',charset='utf8')
    cursor = db.cursor(cursor=pymysql.cursors.DictCursor)
    author = '张欢'
    print('当前的用户为:'+author)
    sql = "select * from ganen_keys_cover where author = '%s'"%(author)
    cursor.execute(sql)
    cover = cursor.fetchall()
    stime = int(time.time())
    if cover:
    p = Pool(20)
    for i in cover:
    p.apply_async(chrom,args=(i,))
    p.close()
    p.join()
    else:
    print('无数据')
    T = 0
    etime = int(time.time())
    ctime = etime - stime
    print('运行时间:'+str(ctime))

  • 相关阅读:
    ORACLE PL/SQL 实例精解之第七章 迭代控制之二
    ORACLE PL/SQL 实例精解之第六章 迭代控制之一
    ORACLE PL/SQL 实例精解之第五章 条件控制:CASE语句
    ORACLE PL/SQL 实例精解之第四章 条件控制:if 语句
    sql中用JOIN USING 简化JOIN ON
    ORACLE PL/SQL 实例精解之第三章 PL/SQL中的SQL
    ORACLE PL/SQL 实例精解之第二章 通用编程语言基础
    删除文件时提示“找不到该项目”,怎么解决? 转摘自:http://jingyan.baidu.com/article/e4d08ffdf5ab470fd2f60df4.html
    C#获取文件夹/文件的大小以及占用空间 转摘自:http://www.cnblogs.com/chenpeng-dota/articles/2176470.html
    git update-index --assume-unchanged on directory 转摘自:http://stackoverflow.com/questions/12288212/git-update-index-assume-unchanged-on-directory
  • 原文地址:https://www.cnblogs.com/simadongyang/p/10252074.html
Copyright © 2011-2022 走看看