import time import requests from concurrent.futures import ThreadPoolExecutor import pymysql import threading import json def get_info(item): try: id,url=item content=json.dumps(requests.get(url,timeout=5).text,ensure_ascii=False) update_sql='update talent_info_3 set content=%s where id= %s'%(content,id) # lock.acquire() cur.execute(update_sql) conn.commit() # lock.release() # time.sleep(2) except Exception as e: print(e,url) if __name__ == '__main__': conn=pymysql.connect(host='127.0.0.1', user='root', password="xxxxxx", database='work', port=3306 ) cur=conn.cursor() # lock = threading.Lock() sql='select id,mainurl from talent_info_3 where content is null and mainurl is not null and school="马萨诸塞大学,阿默斯特"' res1=cur.execute(sql) print(res1) res=cur.fetchall() with ThreadPoolExecutor(max_workers=3) as p: futures=[p.submit(get_info,item) for item in res] result=[task.result() for task in futures] print(result)
异常原因:
使用了多线程,多线程使用了同一个数据库连接,但每个execute前没有加上互斥锁
方法:
方法一:每个execute前加上互斥锁
import threading
lock=threading.lock()
lock.acquire()
cursor.execute(command)
lock.release()
方法二:
每个线程拥有自己的数据库连接,即在线程调用函数中加上数据库连接代码
方法三:
所有线程共用一个连接池,需要考虑线程总数和连接池连接数上限的问题