其他:
1 处理jsonl文件
#!UsersLocalProgramsPython37 # -*- coding: utf-8 -*- import json def save_data_jsonl(data_dict_list,path="./new_id_source.jsonl"): """将数据保存为jsonl格式""" with open(path,mode='a',encoding="utf-8") as fp: for data_dict in data_dict_list: fp.write(json.dumps(data_dict,ensure_ascii=False)+" ") def read_jsonl(path="./baidu.jsonl"): """读取jsonl文件,返回字典对象列表""" data_dict_list=[] with open(path,mode="r",encoding="utf-8") as fp: lines =fp.readlines() for line in lines: data_dict =json.loads(line) data_dict_list.append(data_dict) return data_dict_list
2 格式化时间日期
#!UsersLocalProgramsPython37
# -*- coding: utf-8 -*-
import datetime
import re
def format_date(date):
global month,day,year
if "前" in date:
s = re.search(r"d+", date)
number =s.group()
if "天" in date:
date_format= (datetime.datetime.now() + datetime.timedelta(days=-int(number))).strftime("%Y-%m-%d")
print(date,date_format)
return date_format
date_format =(datetime.datetime.now() + datetime.timedelta(hours=-int(number))).strftime("%Y-%m-%d")
print(date,date_format)
return date_format
elif "昨天" in date:
date_format = (datetime.datetime.now() + datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
print(date,date_format)
return date_format
else:
date_format = date.replace("年", "-").replace("月", "-").replace("日", "")
date_split_list =date_format.split("-")
# year =date_split_list[0] if len(date_split_list)==3 else datetime.datetime.now().year
if len(date_split_list) ==3:
year =date_split_list[0]
month =date_split_list[1]
day =date_split_list[2]
else:
year =datetime.datetime.now().year
month = date_split_list[0]
day = date_split_list[1]
if int(month) < 10:
month = "0" + str(month)
if int(day) < 10:
day = "0" + str(day)
date_format = str(year) + "-" + str(month) + "-" + str(day)
print(date,date_format)
return date_format
format_date('7月21日')
format_date('2021年7月21日')
format_date("9天前")
format_date("20小时前")
format_date("昨天17:50")