re模块 之分组
>>> import re >>> re.findall("ab|c","sdfab|csdf") ['ab', 'c'] >>> re.findall("ab|cd","sdfab|cdsdf") ['ab', 'cd'] >>> re.findall ("abc+","abccccc") ['abccccc'] >>> re.findall ("abc*","abccccc") ['abccccc'] >>> re.findall("(abc)+","abcabcabcabc") # 分组 ['abc'] >>> re.findall ("(abc)*","abcabcabcabc") ['abc', ''] >>> re.findall ("abc+","abcabcabcabcabc") ['abc', 'abc', 'abc', 'abc', 'abc']
search分组用法 (只返回一个对象)
>>> re.search("(?P<name>\w+)","abcccc") <re.Match object; span=(0, 6), match='abcccc'> >>> re.search("(?P<name>\w+)","abcccc").group() 'abcccc'
根据组名称 提取内容
>>> re.search("(?P<name>[a-z]+)\d+","john18musicxiaoming20movie").group("name") 'john' >>> re.search("(?P<name>[a-z]+)(?P<age>\d+)","john18musicxiaoming20movie").group("age") '18' >>>
re模块的其他常用方法
>>> re.match("\d+","44safs45asdf321saf789").group() # 只取一个对象,只能取开头的 '44' >>> re.split(" ","hello world") # 按空格为分界线取 ['hello', 'world'] >>> re.split("[ |]","hello world|aaa") ['hello', 'world', 'aaa'] >>> re.split("[ab]","safdbds") # 按 a b 为分界线取 ['s', 'fd', 'ds']
--替换
>>> re.sub("\d+","A","a1sd4fg45h") 'aAsdAfgAh' >>> re.sub("\d","A","a1sd4fg45h") 'aAsdAfgAAh' >>> re.sub("\d+","A","a1sd4fg45h",2) 'aAsdAfg45h'
>>> a = re.compile ("\d+") # 定义规则 >>> a.findall("safsd12a45") # 直接调用 (可以调用多次) ['12', '45']
>>> re.finditer ("\d+","asfa45saf56") # 将数字放到迭代器内存里 <callable_iterator object at 0x00000220B1E50E80>
获取里面的内容
>>> b = re.finditer ("\d+","asfa45saf56") >>> next(b) <re.Match object; span=(4, 6), match='45'> >>> next(b) <re.Match object; span=(9, 11), match='56'>
扫描二维码关注公众号,回复:
7038718 查看本文章
优先级
>>> re.findall("www\.(baidu|123)\.com","www.baidu.com") ['baidu'] >>> re.findall("www\.(?:baidu|123)\.com","www.baidu.com") # ‘ ?:’去掉优先级 ['www.baidu.com']