首先从github上获取别人扒好的词库json数据
https://github.com/kajweb/dict
数据格式大致如下
接着就可以直接使用python处理数据,并插入数据库了
import sys from jsonpath import jsonpath import json import demjson import pymysql #打开文件名为json的文件夹下的json文件 filename = "json\cet4_2.json" file = open(filename, 'r', encoding='utf-8') #链接数据库 def dbconnect(): try: db = pymysql.connect( host='localhost', user='root', passwd='123456', db='vocab' ) except Exception as e: sys.exit("Can't connect to database") return db #插入数据 def insertDb(word, trans, pos): try: db = dbconnect() cursor = db.cursor() cursor.execute(" INSERT INTO toefl(word, trans, pos) VALUES(%s, %s, %s)", (word, trans, pos)) db.commit() cursor.close() except Exception as e: print(str(e)) #逐行读取json数据 cnt = 0 for line in file.readlines(): words = line.strip() word_json = json.loads(words) word = ''.join(jsonpath(word_json, "$..headWord")) trans = ''.join(jsonpath(word_json, "$..tranCn")) res = demjson.decode(words) pos = ((((res.get('content')).get('word')).get('content')).get('trans'))[0].get('pos') # print(word, trans, res, pos) insertDb(word, trans, pos) file.close()
插入成功后
参考资料:
https://github.com/kajweb/dict
https://www.jb51.net/article/177500.htm