使用多个界定符分隔字符串
import re line = 'asdf fjdk; afed, fjek,asdf, foo' print(re.split(r'[;,s]s*', line)) print(re.split(r'(;|,|s)s*', line)) #加括号表示捕获分组,这样匹配的结果也显示在列表中
匹配开头或结尾
url = 'http://www.python.org' print(url.startswith(('http', 'https', 'ftp'))) # 如果匹配多个一定是元组,list和set必须先调用tuple()转成元祖 import re print(re.match('http:|https:|ftp:', url)) #正则也可以
使用Shell中的通配符匹配
from fnmatch import fnmatch, fnmatchcase print('foo.txt', '*.txt') print('foo.txt', '?oo.txt') print('Dat45', 'Dat[0-9]*') names = ['Dat1.csv', 'Dat2.csv', 'config.ini', 'foo.py'] print([name for name in names if fnmatch(name, 'Dat*.csv')])
忽略大小写匹配和替换
import re text = 'UPPER PYTHON, lower python, Mixed Python' print(re.findall('python', text, re.IGNORECASE)) print(re.findall('python', text)) print(re.sub('python', 'java', text, count=100, flags=re.IGNORECASE))
贪婪和非贪婪匹配
(.*)匹配任意字符,贪婪匹配。(.*?)非贪婪匹配
import re str_pat = re.compile(r'"(.*)"') text = 'Computer says "no." Phone says "yes."' print(str_pat.findall(text)) str_pat = re . compile(r'"(.*?)"') print(str_pat.findall(text))
多行匹配
import re comment = re.compile(r'/*(.*?)*/') text1 = '/* this is a comment */' text2 = '''/* this is a multiline comment */ ''' print(comment.findall(text1)) print(comment.findall(text2)) #在这个模式中,(?:.| )指定了一个非捕获组 (也就是它定义了一个仅仅用来做匹配,而不能通过单独捕获或者编号的组)。 comment = re.compile(r'/*((?:.| )*?)*/') print(comment.findall(text2))
#re.DOTALL 它可以让正则表达式中的点(.)匹配包括换行符在内的任意字符。
comment = re.compile(r'/*(.*?)*/', re.DOTALL)
print(comment.findall(text2))
删除字符串中不需要的字符
import re s = ' hello world ' print(s.strip()) print(s.strip(' ')) print(s.replace(" ", "")) print(re.sub('s+', ' ', s)) 输出: hello world hello world helloworld hello world
字符串对齐
text = 'Hello World' print(text.rjust(20, "*")) print(text.center(20,'*')) #python3 print(format(text, '>20')) print(format(text, '<20')) print(format(text, '^20')) print(format(text, '*>20')) print(format(text, '=<20')) print(format(text, '*^20')) print('{:>10s} {:>10s}'.format('hello', 'world')) x = 1.2345 print(format(x, '>10')) print(format(x, '^10.2f')) #python2 print('%-20s' % text) print('%20s' % text)
字符串拼接
parts = ['Is', 'Chicago', 'Not', 'Chicago?'] print(' '.join(parts)) # 最快的方法 print('hello' + ' ' + 'world') # 如果只是简单的拼接几个字符串,这样就可以了 print('hello' ' world') # 这样也ok s = '' for p in parts: # never do this s += p parts = ['now', 'is', 10, ':', '45'] print(' '.join(str(d) for d in parts)) # 用生成器来连接非str a, b, c = ['f', 'z', 'k'] print (a + ':' + b + ':' + c) # Ugly print (':'.join([a, b, c])) # Still ugly print (a, b, c, sep=':') # Better def sample(): #如果构建大量的小字符串,考虑用生成器的方式 yield 'Is' yield 'Chicago' yield 'Not' yield 'Chicago?' print(' '.join(sample()))
字符串插入变量
class Info(object): def __init__(self, name, n): self.name = name self.n = n s = '{name} has {n} messages.' name = 'fzk' n = 10 print(s.format(name=name, n=n)) print(s.format_map(vars())) print(s.format_map(vars(Info(name, n))))
#如果变量缺失,会印发报错。可以用下面的方法 class safesub (dict): """ 防止key 找不到""" def __missing__ (self, key): return '{' + key + '}' del n print(s.format_map(safesub(vars())))