Python3笔记之正则表达式re

一、平台
windows7、python3.7
二、目标
了解python3正则表达式的用法
三、示例和结果如下
import re

print("-----------------匹配单个字符和数字---------------------")
r'''
. 匹配出换行符以外的任意字符
[0123456789]   []表示匹配匹配中括号中包含的任意字符
[kahn]        匹配中括号中的任意一个字符k, a, h, n都可以
[a-z]         匹配任意小写字母
[A-Z]         匹配任意大写字母
[0-9]         匹配任意数字
[0-9a-zA-Z]   匹配任意数字或字母
[0-9a-zA-Z_]   匹配任意数字或字母、下划线
[^kahn]       匹配除k,a,h,n以外的任意字母
[^0-9]         匹配所有非数字的所有字符
\d            匹配所有的数字，效果同[0-9]
\D            匹配所有非数字的字符，效果同[^0-9]
\w            匹配数字、字母、下划线，效果同[0-9a-zA-Z_]
\W            匹配非数字、字母和下划线，效果同[^0-9a-zA-Z_]
\s            匹配任意的空白符(空格、换行、回车、换页、制表)，效果同[\f\n\r\t]
\S            匹配任意的非空白符，效果同[^\f\n\r\t]
'''
print(re.search(".", "hello world"))
# #<re.Match object; span=(0, 1), match='h'>
print(re.search("[0123456789]", "hello2 world"))
# #<re.Match object; span=(5, 6), match='2'>
print(re.search("[kahn]", "hello2 world"))
# #<re.Match object; span=(0, 1), match='h'>
print(re.search("[^kahn]", "hello2 world"))
# #<re.Match object; span=(1, 2), match='e'>
print(re.search("[^0-9]", "hello2 world"))
# #<re.Match object; span=(0, 1), match='h'>
print(re.findall("[^0-9]", "hello2 world"))
# #['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd']
print(re.findall("\d", "hello2 world"))
# #['2']
print(re.findall("\D", "hello2 world"))
# #['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd']
print(re.findall("\s", "hello2 world"))
# #[' ']

print("--------------------锚字符(边界字符)--------------------------")
'''
^      行首严格匹配，但若出现在中括号[^]就是取非的意思了(每一行的行首都匹配)
$      行尾严格匹配
\A     匹配字符串的开始，和^的区别是\A只匹配整个字符串的开头，即使在re.M模式下，也不会匹配其它行的行首
\Z     匹配字符串的结尾，即使是在多行情况下，也不回去匹配哪些中间部门的换行前的结尾
\b     匹配一个单子的边界，也就是单词和空格间的位置
\B     匹配非单词边界
'''
print(re.search("^kahn", "kahn hello2 world"))
# #<re.Match object; span=(0, 4), match='kahn'>
print(re.search("^kahn", "hello2 world kahn"))
# #None
print(re.search("kahn$", "kahn hello2 world"))
# #None
print(re.search("kahn$", "hello2 world kahn"))
# #<re.Match object; span=(13, 17), match='kahn'>
print(re.findall("^kahn", "kahn hello world \nkahn the world", re.M))
# #['kahn', 'kahn']
print(re.findall("\Akahn", "kahn hello world \n kahn the world", re.M))
# #['kahn']
print(re.search(r"er\b", "never "))
# #<re.Match object; span=(3, 5), match='er'>

print("--------------------匹配多个字符--------------------------")
'''
说明：下面的x、y、z均为架设的普通字符，不是正则表达式的元字符
(xyz)        匹配小括号内的xyz，xyz字母作为一个整体去匹配
x?           匹配0个或者1个x
x*           匹配0个或者任意多个x
x+           匹配至少1个x
x{n}         匹配确定的n个x（n是一个非负整数）
x{n,}        匹配至少n个x
x{n,m}       匹配至少n个最多m个x。注意n必须小于等于m
x|y          匹配x或者y
'''
print(re.findall(r"(kahn)", "kahn hello world, kahnthe world，hahakahnhehe"))
# #['kahn', 'kahn', 'kahn']
print(re.findall(r"a?", "kahna, apple"))
# #['', 'a', '', '', '', '', 'a', '', '', '', '', '']
print(re.findall(r"a*", "kahna, apple"))
print(re.findall(r"x?", "xxxxx"))
# #['x', 'x', 'x', 'x', 'x', '']
print(re.findall(r"x*", "xxxxx"))
# #['xxxxx', '']
print(re.findall(r"x*", "xxaxxx"))
# #['xx', '', 'xxx', '']
print(re.findall(r".*", "xxaxxx"))
# #['xxaxxx', '']
print(re.findall(r"x+", "xxaxxx"))
# #['xx', 'xxx']
print(re.findall(r"a+", "xxaxxx"))
# #['a']
print(re.findall(r"a{3}", "kaaaaahn, hello world aaaaa"))
# #['aaa', 'aaa']
print(re.findall(r"a{3,}", "kaaaaahn, hello world aaaaa"))
# #['aaaaa', 'aaaaa']
print(re.findall(r"a{3,}", "kahn, hello world aa"))
# #[]
print(re.findall(r"a{1,3}", "kahn, hello world aa"))
# #['a', 'aa']
print(re.findall(r"(k|K)ahn", "kahn, hello world, Kahn hehekkk"))
# #['k', 'K']
print(re.findall(r"((k|K)ahn)", "kahn, hello world, Kahn hehekahN"))
# #[('kahn', 'k'), ('Kahn', 'K')]

print("--------------------正则表达式的字符串切割--------------------------")
s1 = "kahn,   hello world"
print(s1.split())   # #普通的字符串切割
# #['kahn,', 'hello', 'world']
print(re.split(r" +", s1))     # #利用正则表达式切割
# #['kahn,', 'hello', 'world']

print("--------------------正则表达式的迭代器--------------------------")
'''
re.finditer
'''
s2 = "今天是2019年5月4日，五四青年节kahn，五一放假的第四天。kahn放假比上班还累，昨天跑去SJ，又去WY广场吃晚饭，"
d = re.finditer(r"(kahn)", s2)
while True:
    try:
        x = next(d)
        print(d)
    except StopIteration as e:
        break

print("--------------------字符串的替换和修改--------------------------")
'''
re.sub(paattern, repl, string, count=0)   返回值是个字符串
re.subn(paattern, repl, string, count=0)  返回值是个元组，包含字符串和被修改的次数
pattern: 正则表达式规则
repl:    指定的用来替换的字符串
string:  目标字符串
count:   最多替换的次数
功能：在目标字符串中以正则表达式的规则匹配的字符串，再把他们替换成指定的字符串。可以指定替换的次数。
如果不指定替换次数，则替换所有能匹配到的字符串
'''
s6 = "工作真累，比上学习还累，要是时光能倒流绝对要好好学习，学习天天向上"
print(re.sub(r"学习", "【敏感字符被替换】", s6))
# #工作真累，比上【敏感字符被替换】还累，要是时光能倒流绝对要好好【敏感字符被替换】，【敏感字符被替换】天天向上
print(re.sub(r"学习", "【敏感字符被替换】", s6, count=2))
# #工作真累，比上【敏感字符被替换】还累，要是时光能倒流绝对要好好【敏感字符被替换】，学习天天向上
print(re.subn(r"学习", "【敏感字符被替换】", s6))
# #('工作真累，比上【敏感字符被替换】还累，要是时光能倒流绝对要好好【敏感字符被替换】，【敏感字符被替换】天天向上', 3)

print("--------------------分组--------------------------")
'''
将正则表达式找到的合适的字符串分组提取出来
?P<xArgs1>     给正则设定参数名，比如这里叫xArgs1
'''
dianhua = "021-37213690"
dianhuaRes = re.match(r"(\d{3})-(\d{8})", dianhua)
print(dianhuaRes.group(0))
print(dianhuaRes.group(1))
print(dianhuaRes.group(2))
# #021-37213690
# #021
# #37213690
print(dianhuaRes.groups())
# #('021', '37213690')
dianhuaRes2 = re.match(r"(?P<xArgs1>\d{3})-(?P<xArgs2>\d{8})", dianhua)    # #设定参数名
print(dianhuaRes2.group("xArgs1"), dianhuaRes2.group("xArgs2"))
# #021 37213690

print("--------------------编译--------------------------")
'''
编译：当我们使用正则表达式时，re模块会做两件事
1、编译正则表达式，如果正则表达式本身不合法则会报错
2、用编译后的正则表达式去匹配对象
'''
xRE = r"kahn"
string111 = "kahn hello world, kahn heheda"
print(re.match(xRE, string111))
# #<re.Match object; span=(0, 4), match='kahn'>
re_GuiZe = re.compile(xRE)
print(re_GuiZe.match(string111))
# #<re.Match object; span=(0, 4), match='kahn'>
kahn 2019年5月4日16:31:56
Python3笔记之正则表达式re

猜你喜欢