1.正则表达式
import re
string = 'Hello123World456Hello'
# 从起始位置匹配第一个
print(re.match('Hello', string).span())
# (0, 5)
print(re.match('World', string))
# None
# 在整个字符串匹配第一个
print(re.search('Hello', string).span())
# (0, 5)
print(re.search('World', string).span())
# (8, 13)
result = re.search(r'([A-Za-z]+)(d+)', string)
print(result.group(0)) # Hello123
print(result.group(1)) # Hello
print(result.group(2)) # 123
# 匹配所有
pattern = re.compile(r'd+')
result = pattern.findall(string)
print(result)
# ['123', '456']
pattern = re.compile(r'([A-Za-z]+)(d+)')
result = pattern.findall(string)
print(result)
# [('Hello', '123'), ('World', '456')]
# 将匹配的子串替换
result = re.sub(r'[A-Za-z]+', '', string)
print(result)
# 123456
# 将匹配的数字乘以2
def double(matched):
value = int(matched.group('value'))
return str(value * 2)
print(re.sub('(?P<value>d+)', double, string))
# Hello246World912Hello
# 按照匹配的子串分割
result = re.split(r'[A-Za-z]+', string)
print(result)
# ['', '123', '456', '']
2.日期时间
time
import time
# 获取时间戳
t = time.time()
print(t) # 1594974068.2558458
# 从时间戳获取详细时间信息
lt = time.localtime(t)
print(lt)
# time.struct_time(tm_year=2020, tm_mon=7, tm_mday=17, tm_hour=16, tm_min=22, tm_sec=2, tm_wday=4, tm_yday=199, tm_isdst=0)
# 格式化的时间信息
at = time.asctime(lt)
print(at)
# Fri Jul 17 16:23:03 2020
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
# 2020-07-17 16:26:02
print(time.strftime("%a %b %d %H:%M:%S %Y", time.localtime()))
# Fri Jul 17 16:26:02 2020
# 将格式字符串转换为时间戳
a = "Fri Jul 17 16:26:02 2020"
print(time.mktime(time.strptime(a,"%a %b %d %H:%M:%S %Y")))
# 1594974362.0
datetime
from datetime import datetime, timedelta, date
# 获取datatime
print(datetime.now())
# 2020-07-17 16:35:17.112810
print(datetime(2020, 7, 17, 16, 35))
# 2020-07-17 16:35:00
# datatime和时间戳的相互转换
print(datetime(2020, 7, 17, 16, 35).timestamp())
# 1594974900.0
print(datetime.fromtimestamp(1594974900.0))
# 2020-07-17 16:35:00
# datetime和格式字符串的相互转换
print(datetime.strptime('2020-7-17 16:35:59', '%Y-%m-%d %H:%M:%S'))
# str->datetime 2020-07-17 16:35:59
print(datetime.now().strftime('%a, %b %d %H:%M'))
# datetime->str Fri, Jul 17 16:38
# datetime加减
now = datetime.now()
print(now)
# 2020-07-17 16:40:40.539739
now += timedelta(days=2, hours=12)
now -= timedelta(days=1)
print(now)
# 2020-07-19 04:40:14.693033
# 格式化输出
d = date(2020, 7, 17)
print(format(d, '%A, %B %d, %Y'))
# Friday, July 17, 2020
print('Today is {:%d %b %Y}'.format(d))
# Today is 17 Jul 2020
3.序列化
pickcle
import pickle
# 序列化和反序列化
d1 = dict(name='Tom', age=20)
s = pickle.dumps(d1) # 序列化
d2 = pickle.loads(s) # 反序列化
print(d2)
# {'name': 'Tom', 'age': 20}
# 将对象保存到文件中
with open('dump.txt', 'wb') as f:
pickle.dump(d1, f)
# 从文件中加载对象
with open('dump.txt', 'rb') as f:
d3 = pickle.load(f)
print(d3)
# {'name': 'Tom', 'age': 20}
json
import json
# 字典序列化为json字符串
d = dict(name='Tom', age=20)
json_str = json.dumps(d)
print(json_str)
# {"name": "Tom", "age": 20}
# json字符串反序列化为字典
json_str = '{"age": 20, "name": "Tom"}'
d = json.loads(json_str)
print(d)
# {'age': 20, 'name': 'Tom'}
# 对象实例的序列化和反序列化
class Student(object):
def __init__(self, name, age):
self.name = name
self.age = age
def student2dict(std):
return {
'name': std.name,
'age': std.age
}
def dict2student(d):
return Student(d['name'], d['age'])
s = Student('Tom', 20)
json_str = json.dumps(s, default=student2dict)
print(json_str)
# {"name": "Tom", "age": 20}
json_str = '{"age": 20, "name": "Tom"}'
s = json.loads(json_str, object_hook=dict2student)
print(s.name, s.age)
# Tom 20
4.哈希函数
#1 使用hashlib
import hashlib
md5 = hashlib.md5()
md5.update('how to use md5 in python hashlib?'.encode('utf-8'))
print(md5.hexdigest())
# d26a53750bc40b38b65a520292f69306
md5 = hashlib.md5()
md5.update('how to use md5 in '.encode('utf-8'))
md5.update('python hashlib?'.encode('utf-8'))
print(md5.hexdigest())
# 多次调用update()结果一样
sha1 = hashlib.sha1()
sha1.update('how to use sha1 in '.encode('utf-8'))
sha1.update('python hashlib?'.encode('utf-8'))
print(sha1.hexdigest())
# 2c76b57293ce30acef38d98f6046927161b46a44
#2 使用hmac实现带key的哈希
import hmac
message = b'Hello, world!'
key = b'secret'
h = hmac.new(key, message, digestmod='MD5')
h.hexdigest()
# 'fa4ee7d173f2d97ee79022d1a7355bcf'
5.二进制编码
basea64
# base64是一种用64个字符来表示二进制数据的方法。
import base64
#二进制转base64
base64.b64encode(b'binaryx00string') # b'YmluYXJ5AHN0cmluZw=='
#base64转二进制
base64.b64decode(b'YmluYXJ5AHN0cmluZw==') # b'binaryx00string'
#处理URL时+/替换为-_
base64.b64encode(b'ixb7x1dxfbxefxff') #b'abcd++//'
base64.urlsafe_b64encode(b'ixb7x1dxfbxefxff') #b'abcd--__' #把字符+和/分别变成-和_
base64.urlsafe_b64decode('abcd--__') #b'ixb7x1dxfbxefxff'
struct
# struct模块用来处理的是python数据和表示成python bytes对象的C结构体(struct)之间的转换,
# 应用场景一般是处理文件和网络传输中的二进制数据。
'''
struct s_data {
unsigned short id;
unsigned int length;
char[5] data;
}
'''
from struct import Struct
p_id = 0
p_length = 5
p_data = b'hello'
c_struct = Struct('>HI5s') # >大端存储 <小端存储 !network(=大端存储)
# python数据转换到C结构体二进制数据
packed = c_struct.pack(p_id, p_length, p_data)
print(packed) # b'x00x00x00x00x00x05hello'
# C结构体二进制数据转换到python数据
unpacked = c_struct.unpack(b'x00x00x00x00x00x05hello')
print(unpacked) # (0, 5, b'hello')
BytesIO
# BytesIO StringIO 将IO操作放到内存中提高运行效率
# BytesIO
#在内存中开辟一个二进制模式的buffer,可以像文件对象一样操作它
from io import BytesIO
bio = BytesIO()
print(bio.readable(), bio.writable(), bio.seekable())
bio.write(b'hello
Python')
bio.seek(0)
print(bio.readline())
print(bio.getvalue()) # 无视指针,输出全部内容
bio.close() # 释放buffer
StringIO
# StringIO
from io import StringIO
sio = StringIO() # 像文件对象一样操作
print(sio.readable(), sio.writable(), sio.seekable())
sio.write("hello
Python")
sio.seek(0)
print(sio.readline())
print(sio.getvalue()) # 无视指针,输出全部内容
sio.close() # 释放buffer
6.日志
#1 基本使用
import logging
# 信息级别:DEBUG < INFO < WARNING < ERROR
logging.basicConfig(
level = logging.DEBUG,
filename = "log1.txt",
format = '%(asctime)s - %(name)s - %(levelname)s : %(message)s')
logger = logging.getLogger(__name__)
logger.info("Start wirte log")
logger.warning("Something maybe wrong")
logger.debug("Try to fix bug")
logger.info("Finish")
#2 多进程轮转,用于多进程写同一日志文件
import os, datetime
import logging
import logging.handlers
from cloghandler import ConcurrentRotatingFileHandler
def console_out(errorInfo):
path = './logs'
if not os.path.exists(path):
os.makedirs(path)
today = datetime.date.today().strftime('%Y%m%d') + '.txt'
logFile = os.path.join(path, today)
handler = ConcurrentRotatingFileHandler(logFile, "a", 20 * 1024 * 1024, 10)
fmt = '%(asctime)s - %(levelname)s - %(message)s'
formatter = logging.Formatter(fmt)
handler.setFormatter(formatter)
logger = logging.getLogger()
logger.addHandler(handler)
logger.setLevel(logging.INFO)
logger.info(errorInfo)
logger.removeHandler(handler)
7.表格
# pip install openpyxl
import openpyxl
#1 新建表格并写入
wb = openpyxl.Workbook()
ws = wb.create_sheet(index=0)
for i in range(1,5):
# 第i行第一列
ws.cell(i, 1).value = "NAME"
# 第i行第二列
ws.cell(i, 2).value = "AGE"
# 第i行第三列
ws.cell(i, 3).value = "BIRTH"
wb.save("test.xlsx")
#2 加载表格并读取
wb = openpyxl.load_workbook('test.xlsx')
ws = wb.active
# 遍历所有行
for row in ws.rows:
name = row[0].value
age = row[1].value
birth = row[2].value
print(name, age, birth)