#coding=utf-8# 导入re模块import re
# 使用match方法进行匹配操作
result = re.match(正则表达式,要匹配的字符串)# 如果上一步匹配到数据的话,可以使用group方法来提取数据
result.group()
#coding=utf-8import re
result = re.match("hello","hello world")
result.group()
匹配单个字符
字符
功能
.
匹配任意1个字符(除了\n)
[ ]
匹配[ ]中列举的字符
\d
匹配数字,即0-9
\D
匹配非数字,即不是数字
\s
匹配空白,即 空格,tab键
\S
匹配非空白
\w
匹配单词字符,即a-z、A-Z、0-9、_
\W
匹配非单词字符
#coding=utf-8import re
ret = re.match(".","S")print(ret.group())
ret = re.match("o.e","ooe")print(ret.group())
ret = re.match("ww.o","wwwo")print(ret.group())
[ ] 列举
#coding=utf-8import re
# 如果hello的首字符小写,那么正则表达式需要小写的h
ret = re.match("h","hello Python")print(ret.group())# 如果hello的首字符大写,那么正则表达式需要大写的H
ret = re.match("H","Hello Python")print(ret.group())# 大小写h都可以的情况
ret = re.match("[hH]","hello Python")print(ret.group())
ret = re.match("[hH]","Hello Python")print(ret.group())
ret = re.match("[hH]ello Python","Hello Python")print(ret.group())# 匹配0到9第一种写法
ret = re.match("[0123456789]Hello Python","7Hello Python")print(ret.group())# 匹配0到9第二种写法
ret = re.match("[0-9]Hello Python","7Hello Python")print(ret.group())
ret = re.match("[0-35-9]Hello Python","7Hello Python")print(ret.group())# 下面这个正则不能够匹配到数字4,因此ret为None
ret = re.match("[0-35-9]Hello Python","4Hello Python")# print(ret.group())
单个数字匹配 \d
#coding=utf-8import re
# 普通的匹配方式
ret = re.match("嫦娥1号","嫦娥1号发射成功")print(ret.group())
ret = re.match("嫦娥2号","嫦娥2号发射成功")print(ret.group())
ret = re.match("嫦娥3号","嫦娥3号发射成功")print(ret.group())# 使用\d进行匹配
ret = re.match("嫦娥\d号","嫦娥1号发射成功")print(ret.group())
ret = re.match("嫦娥\d号","嫦娥2号发射成功")print(ret.group())
ret = re.match("嫦娥\d号","嫦娥3号发射成功")print(ret.group())
匹配多个字符
字符
功能
*
匹配前一个字符出现0次或者无限次,即可有可无
+
匹配前一个字符出现1次或者无限次,即至少有1次
?
匹配前一个字符出现1次或者0次,即要么有1次,要么没有
{m}
匹配前一个字符出现m次
{m,n}
匹配前一个字符出现从m到n次
#coding=utf-8# 需求:匹配出,一个字符串第一个字母为大小字符,后面都是小写字母并且这些小写字母可有可无import re
ret = re.match("[A-Z][a-z]*","M")print(ret.group())
ret = re.match("[A-Z][a-z]*","MnnM")print(ret.group())
ret = re.match("[A-Z][a-z]*","Aabcdef")print(ret.group())
#coding=utf-8# 匹配出,变量名是否有效import re
names =["name1","_name","2_name","__name__"]for name in names:
ret = re.match("[a-zA-Z_]+[\w]*",name)if ret:print("变量名 %s 符合要求"% ret.group())else:print("变量名 %s 非法"% name)
# 需求:匹配出,0到99之间的数字#coding=utf-8import re
ret = re.match("[1-9]?[0-9]","7")print(ret.group())
ret = re.match("[1-9]?\d","33")print(ret.group())
ret = re.match("[1-9]?\d","09")print(ret.group())
# 需求:匹配出,8到20位的密码,可以是大小写英文字母、数字、下划线# {m} :出现m次# {m,n}:出现m-n次# {m,}:至少出现m次#coding=utf-8import re
ret = re.match("[a-zA-Z0-9_]{6}","12a3g45678")print(ret.group())
ret = re.match("[a-zA-Z0-9_]{8,20}","1ad12f23s34455ff66")print(ret.group())
匹配开头结尾
字符
功能
^
匹配字符串开头
$
匹配字符串结尾
#coding=utf-8import re
email_list =["[email protected]","[email protected]","[email protected]"]for email in email_list:
ret = re.match("[\w]{4,20}@163\.com", email)if ret:print("%s 是符合规定的邮件地址,匹配后的结果是:%s"%(email, ret.group()))else:print("%s 不符合要求"% email)
email_list =["[email protected]","[email protected]","[email protected]"]for email in email_list:
ret = re.match("[\w]{4,20}@163\.com$", email)if ret:print("%s 是符合规定的邮件地址,匹配后的结果是:%s"%(email, ret.group()))else:print("%s 不符合要求"% email)
匹配分组
字符
功能
|
匹配左右任意一个表达式
(ab)
将括号中字符作为一个分组
\num
引用分组num匹配到的字符串
(?P)
分组起别名
(?P=name)
引用别名为name分组匹配到的字符串
#匹配出0-100之间的数字#coding=utf-8import re
ret = re.match("[1-9]?\d","8")print(ret.group())# 8
ret = re.match("[1-9]?\d","78")print(ret.group())# 78# 不正确的情况
ret = re.match("[1-9]?\d","08")print(ret.group())# 0# 修正之后的
ret = re.match("[1-9]?\d$","08")if ret:print(ret.group())else:print("不在0-100之间")# 添加|
ret = re.match("[1-9]?\d$|100","8")print(ret.group())# 8
ret = re.match("[1-9]?\d$|100","78")print(ret.group())# 78
ret = re.match("[1-9]?\d$|100","08")# print(ret.group()) # 不是0-100之间
ret = re.match("[1-9]?\d$|100","100")print(ret.group())# 100
# 不是以4、7结尾的手机号码(11位)import re
tels =["13100001234","18912344321","10086","18800007777"]for tel in tels:
ret = re.match("1\d{9}[0-35-68-9]$", tel)if ret:print(ret.group())else:print("%s 不是想要的手机号"% tel)
# 匹配出<html>hh</html>#coding=utf-8import re
# 能够完成对正确的字符串的匹配
ret = re.match("<[a-zA-Z]*>\w*</[a-zA-Z]*>","<html>hh</html>")print(ret.group())# 如果遇到非正常的html格式字符串,匹配出错
ret = re.match("<[a-zA-Z]*>\w*</[a-zA-Z]*>","<html>hh</htmlbalabala>")print(ret.group())# 正确的理解思路:如果在第一对<>中是什么,按理说在后面的那对<>中就应该是什么# 通过引用分组中匹配到的数据即可,但是要注意是元字符串,即类似 r""这种格式
ret = re.match(r"<([a-zA-Z]*)>\w*</\1>","<html>hh</html>")print(ret.group())# 因为2对<>中的数据不一致,所以没有匹配出来
test_label ="<html>hh</htmlbalabala>"
ret = re.match(r"<([a-zA-Z]*)>\w*</\1>", test_label)if ret:print(ret.group())else:print("%s 这是一对不正确的标签"% test_label)
# 匹配出<html><h1>www.itcast.cn</h1></html>#coding=utf-8import re
labels =["<html><h1>www.itcast.cn</h1></html>","<html><h1>www.itcast.cn</h2></html>"]for label in labels:
ret = re.match(r"<(\w*)><(\w*)>.*</\2></\1>", label)if ret:print("%s 是符合要求的标签"% ret.group())else:print("%s 不符合要求"% label)
# 匹配出<html><h1>www.baidu.com</h1></html>#coding=utf-8import re
ret = re.match(r"<(?P<name1>\w*)><(?P<name2>\w*)>.*</(?P=name2)></(?P=name1)>","<html><h1>www.baidu.com</h1></html>")
ret.group()
re专属方法
# search# 需求:匹配出文章阅读的次数#coding=utf-8import re
ret = re.search(r"\d+","阅读次数为 9999")
ret.group()
'9999'
# findall# 需求:统计出python、c、c++相应文章阅读的次数#coding=utf-8import re
ret = re.findall(r"\d+","python = 9999, c = 7890, c++ = 12345")print(ret)
# 返回一个匹配列表
['9999', '7890', '12345']
# sub 将匹配到的数据进行替换#coding=utf-8import re
# re.sub(r"匹配替换内容","更换后内容","匹配模板")
ret = re.sub(r"\d+",'998',"python = 997")print(ret)
python = 998
# sub 将匹配到的数据进行替换#coding=utf-8import re
defadd(temp):
strNum = temp.group()
num =int(strNum)+1returnstr(num)
ret = re.sub(r"\d+", add,"python = 997")print(ret)
ret = re.sub(r"\d+", add,"python = 99")print(ret)
python =998
python =100
# split 根据匹配进行切割字符串,并返回一个列表#coding=utf-8import re
# 按照":"或者" "切割
ret = re.split(r":| ","info:xiaoZhang 33 shandong")print(ret)