课程目标
1、提取
2、匹配
3、替换
1、提取
import re ret = re.findall([正则表达式]) # 返回的类型是列表
2、匹配
import re ret = re.match([正则表达式],[被匹配的字符串]) if(ret): return '匹配成功' else: return '匹配失败' # 如果匹配成功,返回<class 're.Match'>对象 # 如果匹配不成功,返回None
3、替换
import re ret = re.sub([正则表达式],[替换成的字符串],[被替换的字符串])
import re with open('index.html','r',encoding='utf-8') as f: html = f.read() # print(html) html = re.sub(' ','',html) pattern_1 = '<div class="email">(.*?)</div>' ret_1 = re.findall(pattern_1,html) print(ret_1[0].strip()) pwd_pattern = r'^[a-zA-Z][a-zA-Z0-9_]{5,15}$' pwd1 = '123' pwd2 = 'a123123' pwd3 = 'aaa' print(re.match(pwd_pattern,pwd1)) print(re.match(pwd_pattern,pwd2)) print(re.match(pwd_pattern,pwd3))
index.html
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <div> <div class="email"> Email:1@qq.com </div> <div class="mobile"> Mobile:18688888888 </div> </div> </body> </html>