# coding:utf-8 import random, re import json, time import uuid from bs4 import BeautifulSoup import threading import requests import MySQLdb from lxml import etree from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.common.proxy import ProxyType import redis r = redis.Redis(host="10.10.20.110", port=6379) import random import multiprocessing import os import urllib3 urllib3.disable_warnings() from HTMLParser import HTMLParser import sys reload(sys) sys.setdefaultencoding('utf-8') session = requests.session() import hashlib def md5(str): m = hashlib.md5() m.update(str) return m.hexdigest() def replace(newline): newline = str(newline) newline = newline.replace(' ','').replace(' ','').replace(' ','').replace(' ','').replace(' ','').replace(' ','').replace(' ','').replace('amp;','') re_comment = re.compile('<!--[^>]*-->') newlines = re_comment.sub('', newline) newlines = newlines.replace('<!--','').replace('-->','') return newlines def insert_data(dbName,data_dict): try: data_values = "(" + "%s," * (len(data_dict)) + ")" data_values = data_values.replace(',)', ')') dbField = data_dict.keys() dataTuple = tuple(data_dict.values()) dbField = str(tuple(dbField)).replace("'",'') conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="epai_spider", charset="utf8") cursor = conn.cursor() sql = """ insert into %s %s values %s """ % (dbName,dbField,data_values) params = dataTuple cursor.execute(sql, params) conn.commit() cursor.close() conn.close() print "===== 插入成功 =====" return 1 except Exception as e: print "******** 插入失败 ********" print e return 0 def insert_data1(dbName,data_dict): try: data_values = "(" + "%s," * (len(data_dict)) + ")" data_values = data_values.replace(',)', ')') dbField = data_dict.keys() dataTuple = tuple(data_dict.values()) dbField = str(tuple(dbField)).replace("'",'') conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="liuyao_spider", charset="utf8") cursor = conn.cursor() sql = """ insert into %s %s values %s """ % (dbName,dbField,data_values) params = dataTuple cursor.execute(sql, params) conn.commit() cursor.close() conn.close() print "===== 插入成功 =====" return 1 except Exception as e: print "******** 插入失败 ********" print e return 0 def insert_data_many(dbName,list_data_dict): try: # 得到列表的第一个字典集合 data_dict = list_data_dict[0] # 得到(s%,s%,s%,s%) data_values = "(" + "%s," * (len(data_dict)) + ")" data_values = data_values.replace(',)', ')') dbField = data_dict.keys() dataTuple = tuple(data_dict.values()) dbField = str(tuple(dbField)).replace("'",'') conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="epai_spider", charset="utf8") cursor = conn.cursor() sql = """ insert into %s %s values %s """ % (dbName,dbField,data_values) params = [] for item in list_data_dict: params.append(tuple(item.values())) params = tuple(params) # print sql # print dbField # print params # print data_values cursor.executemany(sql, params) conn.commit() cursor.close() conn.close() print "===== 插入成功 =====" return 1 except Exception as e: print "******** 插入失败 ********" print e return 0 def select_data(sql): conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="epai_spider", charset="utf8") cursor = conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) cursor.execute(sql) result = cursor.fetchall() resultList = [] for i in result: resultList.append(i) cursor.close() return resultList def select_data1(sql): conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="liuyao_spider", charset="utf8") cursor = conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) cursor.execute(sql) result = cursor.fetchall() resultList = [] for i in result: resultList.append(i) cursor.close() return resultList def update_data(sql,data): conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="epai_spider", charset="utf8") cursor = conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) try: cursor.execute(sql,data) conn.commit() print "更新成功" except Exception as e: print e conn.rollback() conn.close() def update_data1(sql,data): conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="liuyao_spider", charset="utf8") cursor = conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) try: cursor.execute(sql,data) conn.commit() print "更新成功" except Exception as e: print e conn.rollback() conn.close()