二、python正则表达式

贪婪匹配

惰性匹配

import re
str = "X1Y22Y333Y4444"


# 贪婪匹配（会一直匹配到最后）
# 以X开头，中间除换行符之外的任意字符，出现0次或更多，并最后以Y结尾
print(re.findall("X.*Y", str))
# ['X1Y22Y333Y']

# 以Y开头，中间除换行符之外的任意字符，出现0次或更多，并匹配到字符最后一个Y就结束
print(re.findall("Y.*Y", str))
# ['Y22Y333Y']


-----------------------------------------------------------------


# 惰性匹配（匹配到第一个就停止）
# 以X开头，中间除换行符之外的任意字符，出现0次或更多，匹配到1个就结束
print(re.findall("X.*?Y", str))
# ['X1Y']

# 以Y开头，中间除换行符之外的任意字符，出现0次或更多，并匹配到第一个Y字符就结束，
print(re.findall("Y.*?Y", str))
# ['Y22Y']


-----------------------------------------------------------------



str2 = "Hello World 123"

# 字母、数字、下划线，出现1次或多次
exp = re.compile("\w+") # 当使用很多次的时候，可以采用这种写法，和前面的效果是一样的
print(exp.findall(str2))
# ['Hello', 'World', '123']


-----------------------------------------------------------------


# 当一句话很长，我们要匹配其中的字符，比如这里的名字，
# 在is+空格后面，在感叹号前面，中间的除换行符外的任意字符重复0次或更多。
str3 = "My name is Martin!"
exp = re.compile("is (.*)!")
print(exp.findall(str3))
# ['Martin']

------------------

# 例如，只想获取下面的名字
# 在is+空格后面，在感叹号前面，中间的除换行符外的任意字符表示0次或更多，后面的？是懒惰匹配，只匹配1个。
str4 = "My name is Martin! My name is Sam! My name is Tom!"
exp = re.compile("is (.*?)!")
print(exp.findall(str4))
# ['Martin', 'Sam', 'Tom']

------------------

# 从换行中获取名字+年龄
str5 = """
My name is Martin, and I am 25 years old.
My name is Sam, and I am 27 years old.
My name is Tom, and I am 23 years old.
"""
# 先匹配名字：is+空格后面，逗号前面的除换行符外的任意字符，匹配0次或更多，只匹配1个。
# 然后匹配年龄：逗号后面的，除换行符外的任意字符，匹配0次或更多，只匹配1个,am前面的数字，匹配0个或更多。
exp = re.compile("is (.*?),.*?am (\d*)")
# print(exp.findall(str5))
for i, j in exp.findall(str5):
    print(i, j)

# 输出结果：
# Martin 25
# Sam 27
# Tom 23

------------------

# 匹配除html的文本值
str6 = """<p id='header' class='bg-primary'>Header</p>
<p id='main' class='bg-100'>Hello</p>
<p id='article' class='bg-120'>Hi.....</p>
"""

# 先匹配<p XXX> : 以<p 开头，以>结尾，期间的任意字符。
# 然后以<结尾，这之间的文本内容。
exp = re.compile("<p .*?>(.*?)<")
print(exp.findall(str6))
# ['Header', 'Hello', 'Hi.....']


------------------

# 继上，当文本有换行时，加入 re.S即可解决无法匹配换行符的问题

str7 = """
<p id='header' class='bg-primary'>
    Header
</p>
<p id='main' class='bg-100'>
    Hello
</p>
<p id='article' class='bg-120'>
    Hi.....
</p>
"""
exp = re.compile("<p .*?>(.*?)<", re.S)
for i in exp.findall(str7):
    print(i.strip())

# 输出：
# Header
# Hello
# Hi.....
二、python正则表达式

贪婪匹配

惰性匹配

猜你喜欢