Python 正则表达式语法

正则表达式由字符类、量词、分组、转义等语法元素组成。

字符类

匹配一类字符中的任意一个。

Python

import re

# [abc] 匹配 a、b、c 中任意一个
print(re.findall(r"[abc]", "abcde"))  # ['a', 'b', 'c']

# [a-z] 匹配任意小写字母
print(re.findall(r"[a-z]", "Hello123"))  # ['e', 'l', 'l', 'o']

# [0-9] 匹配任意数字
print(re.findall(r"[0-9]", "abc123"))  # ['1', '2', '3']

# [^abc] 否定，匹配除 a、b、c 外的字符
print(re.findall(r"[^abc]", "abcdef"))  # ['d', 'e', 'f']

# [a-zA-Z0-9] 组合范围
print(re.findall(r"[a-zA-Z0-9]", "Hello123"))  # ['H', 'e', 'l', 'l', 'o', '1', '2', '3']

预定义字符类

Python

import re

# \d 数字 [0-9]
print(re.findall(r"\d", "abc123"))  # ['1', '2', '3']

# \D 非数字 [^0-9]
print(re.findall(r"\D", "abc123"))  # ['a', 'b', 'c']

# \w 字母数字下划线 [a-zA-Z0-9_]
print(re.findall(r"\w+", "hello_world 123"))  # ['hello_world', '123']

# \W 非字母数字下划线
print(re.findall(r"\W", "hello world"))  # [' ']

# \s 空白字符（空格、制表、换行等）
print(re.findall(r"\s+", "hello\tworld\n"))  # ['\t', '\n']

# \S 非空白字符
print(re.findall(r"\S+", "hello world"))  # ['hello', 'world']

# . 除换行外任意字符
print(re.findall(r".", "abc"))  # ['a', 'b', 'c']

预定义字符类列表：

语法	含义
\d	数字
\D	非数字
\w	字母数字下划线
\W	非字母数字下划线
\s	空白字符
\S	非空白字符
.	除换行任意字符

量词

指定匹配次数。

Python

import re

# * 0 欧姆或多次
print(re.findall(r"ab*c", "ac abc abbc"))  # ['ac', 'abc', 'abbc']

# + 1 欧姆或多次
print(re.findall(r"ab+c", "ac abc abbc"))  # ['abc', 'abbc']

# ? 0 欧姆或 1 次
print(re.findall(r"ab?c", "ac abc abbc"))  # ['ac', 'abc']

# {n} 精确 n 次
print(re.findall(r"a{3}", "a aa aaa aaaa"))  # ['aaa', 'aaa']

# {n,m} n 到 m 次
print(re.findall(r"a{2,3}", "a aa aaa aaaa"))  # ['aa', 'aaa', 'aaa']

# {n,} 至少 n 次
print(re.findall(r"a{2,}", "a aa aaa"))  # ['aa', 'aaa']

量词列表：

量词	含义
*	0 次或多次
+	1 次或多次
?	0 欧姆或 1 次
{n}	精确 n 次
{n,m}	n 到 m 次
{n,}	至少 n 次

定位符

匹配位置而非字符。

Python

import re

# ^ 字符串开头
print(re.findall(r"^hello", "hello world"))  # ['hello']
print(re.findall(r"^hello", "say hello"))  # []

# $ 字符串结尾
print(re.findall(r"world$", "hello world"))  # ['world']
print(re.findall(r"world$", "world hello"))  # []

# \b 单词边界
print(re.findall(r"\bhello\b", "hello world hellothere"))  # ['hello']

# \B 非单词边界
print(re.findall(r"\Bhello", "hellothere"))  # ['hello']

定位符列表：

语法	含义
^	开头
$	结尾
\b	单词边界
\B	非单词边界

分组

Python

import re

# (...) 捕获分组
match = re.search(r"(ab)+", "ababab")
print(match.group(0))  # ababab

# (?:...) 非捕获分组
match = re.search(r"(?:ab)+", "ababab")
print(match.group(0))  # ababab

# | 或运算
print(re.findall(r"cat|dog", "cat and dog"))  # ['cat', 'dog']

转义

Python

import re

# 转义特殊字符
print(re.findall(r"\.", "a.b.c"))  # ['.', '.']
print(re.findall(r"\*", "a*b*c"))  # ['*', '*']

# 转义反斜杠
print(re.findall(r"\\", "a\\b"))  # ['\\']

# 常见需转义的字符: . * + ? ^ $ \ | ( ) [ ] { }

特殊序列

Python

import re

# \A 字符串开头（类似 ^）
print(re.findall(r"\Ahello", "hello world"))  # ['hello']

# \Z 字符串结尾（类似 $）
print(re.findall(r"world\Z", "hello world"))  # ['world']

# \n 反向引用第 n 个分组
text = "hello hello"
match = re.search(r"(\w+)\s+\1", text)
print(match.group(0))  # hello hello

综合示例

Python

import re

# 邮箱验证
email_pattern = r"\w+@\w+\.\w+"
print(re.match(email_pattern, "user@example.com"))

# 手机号匹配
phone_pattern = r"1[3-9]\d{9}"
print(re.match(phone_pattern, "13812345678"))

# 日期格式
date_pattern = r"\d{4}-\d{2}-\d{2}"
print(re.match(date_pattern, "2024-05-19"))

# IP 地址
ip_pattern = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"
print(re.match(ip_pattern, "192.168.1.1"))

要点总结

[abc] 字符类匹配集合内任意字符
[a-z] 范围，[^abc] 否定
\d 数字，\w 字母数字，\s 空白
* 多次，+ 至少1次，? 0或1次
{n} 精确次数，{n,m} 范围次数
^ 开头，$ 结尾，\b 单词边界
(...) 分组，(?:...) 非捕获，| 或
特殊字符需用 \ 转义
组合语法元素构建复杂匹配模式

📝 发现内容有误？点击此处直接编辑