查排名 - 走看看

zoukankan html css js c++ java

查排名

#!/usr/bin/env python
# -*- coding:utf-8 -*-
from selenium.common.exceptions import TimeoutException
import os, time, random
from multiprocessing import Pool
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from pyquery import PyQuery as pq
import pymysql
import time
import asdl
import random
import requests
import urllib.request
from urllib import parse

def get_platform(url):
url_index = url.index('://')
url_index += 3
new_url = url[url_index:]
url_index = new_url.index('/')
platform = new_url[:url_index]
return platform

def get_url(url):
resp=urllib.request.urlopen(url)
data=resp.read().decode('utf-8')
data_index = data.lower().find('url=')
if data_index != -1:
data_index += 4
new_data = data[data_index:]
data_index = new_data.find('"')
url = new_data[:data_index]
return url.strip("'")
def saveData(sql):
T = 1
while T:
db = pymysql.connect(host="47.94.36.26",user="seo",passwd='djAcfKNHxF',db='seo',charset='utf8')
cursor = db.cursor(cursor=pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
db.commit()
print('ok')
T = 0
except:
db.rollback()
print('error:'+sql)
print('提交失败,请您和开发人员联系,谢谢合作！')
time.sleep(30)
def chrom(data):
data_id = data['id']
#删除关键词
sql = "delete from ganen_keys_cover where id = '%s'"%(data_id)
saveData(sql)
words = data['words']
uid = data['uid']
rule = data['rule']
author = data['author']
#拼接url
url = 'https://m.baidu.com/s?'
dict1 ={'word': words}
url_data = parse.urlencode(dict1)
url = url + url_data
#百度贴吧bug
headers = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36',
'Cookie':'gr_user_id=1f9ea7ea-462a-4a6f-9d55-156631fc6d45; bid=vPYpmmD30-k; ll="118282"; ue="codin; __utmz=30149280.1499577720.27.14.utmcsr=douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/doulist/240962/; __utmv=30149280.3049; _vwo_uuid_v2=F04099A9dd; viewed="27607246_26356432"; ap=1; ps=y; push_noty_num=0; push_doumail_num=0; dbcl2="30496987:gZxPfTZW4y0"; ck=13ey; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1515153574%2C%22https%3A%2F%2Fbook.douban.com%2Fmine%22%5D; __utma=30149280.833870293.1473539740.1514800523.1515153574.50; __utmc=30149280; _pk_id.100001.8cb4=255d8377ad92c57e.1473520329.20.1515153606.1514628010.'
}
res = requests.get(url, headers = headers)
data = res.text
#获取数据
doc = pq(data)
results = doc(doc.html()).find('#results').children('div')
#分析数据
n = 0
for i in results:
adv = doc(i).find("span:contains('广告')")
if not adv:
n += 1
doc_i = doc(i)
title_find = doc_i.find(":contains('"+rule+"')").text()
if title_find :
#标题
title = doc_i.find("h3").text()
try:
print('标题:'+title)
url = doc_i.find("a").attr('href')
url = get_url(url)
platform = get_platform(url)
ctime = int(time.time())
ranking = n
sql = "insert into ganen_keys_results(words,ranking,title,url,rule,platform,uid,author,create_time) "
sql += " values('%s','%s','%s','%s','%s','%s','%s','%s','%s')"%(words,ranking,title,url,rule,platform,uid,author,ctime)
saveData(sql)
except:
print('error:'+title)

if __name__=='__main__':
db = pymysql.connect(host="47.94.36.26",user="seo",passwd='djAcfKNHxF',db='seo',charset='utf8')
cursor = db.cursor(cursor=pymysql.cursors.DictCursor)
author = '张欢'
print('当前的用户为:'+author)
sql = "select * from ganen_keys_cover where author = '%s'"%(author)
cursor.execute(sql)
cover = cursor.fetchall()
stime = int(time.time())
if cover:
p = Pool(20)
for i in cover:
p.apply_async(chrom,args=(i,))
p.close()
p.join()
else:
print('无数据')
T = 0
etime = int(time.time())
ctime = etime - stime
print('运行时间:'+str(ctime))

查看全文

相关阅读:
（转载）SAPI 包含sphelper.h编译错误解决方案
 C++11标准的智能指针、野指针、内存泄露的理解（日后还会补充，先浅谈自己的理解）
504. Base 7（LeetCode）
242. Valid Anagram（LeetCode）
169. Majority Element（LeetCode）
100. Same Tree（LeetCode）
171. Excel Sheet Column Number（LeetCode）
168. Excel Sheet Column Title（LeetCode）
122.Best Time to Buy and Sell Stock II(LeetCode)
404. Sum of Left Leaves（LeetCode）

原文地址：https://www.cnblogs.com/simadongyang/p/10252074.html