实际应用案例
数据验证
电子邮件验证
import re
def validate_email(email):
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return re.match(pattern, email) is not None
## LabEx电子邮件验证示例
emails = [
'[email protected]',
'invalid.email',
'[email protected]'
]
for email in emails:
print(f"{email}: {validate_email(email)}")
电话号码验证
def validate_phone(phone):
pattern = r'^\+?1?\d{10,14}$'
return re.match(pattern, phone) is not None
phones = ['+15551234567', '1234567890', 'invalid']
for phone in phones:
print(f"{phone}: {validate_phone(phone)}")
数据提取
提取URL
text = "Visit our website at https://www.labex.io and http://example.com"
urls = re.findall(r'https?://\S+', text)
print(urls)
解析日志文件
log_entry = "2023-06-15 14:30:45 [ERROR] Database connection failed"
pattern = r'(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2}) \[(\w+)\] (.+)'
match = re.match(pattern, log_entry)
if match:
date, time, level, message = match.groups()
print(f"日期: {date}, 时间: {time}, 级别: {level}, 消息: {message}")
文本处理
替换敏感信息
def mask_sensitive_data(text):
## 屏蔽电子邮件地址
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
return re.sub(email_pattern, '[MASKED EMAIL]', text)
示例文本 = "Contact support at [email protected] for assistance"
print(mask_sensitive_data(示例文本))
配置解析
解析配置文件
config = """
server_host=localhost
server_port=8080
debug_mode=true
"""
def parse_config(config_text):
config_dict = {}
pattern = r'^(\w+)=(.+)$'
for line in config_text.strip().split('\n'):
match = re.match(pattern, line)
if match:
key, value = match.groups()
config_dict[key] = value
return config_dict
parsed_config = parse_config(config)
print(parsed_config)
性能分析
graph LR
A[正则表达式用例] --> B[数据验证]
A --> C[数据提取]
A --> D[文本处理]
A --> E[配置解析]
最佳实践
实践 |
描述 |
示例 |
编译模式 |
重用编译后的模式 |
pattern = re.compile(r'\d+') |
使用原始字符串 |
防止转义序列问题 |
r'\n' 而不是 '\\n' |
处理错误 |
捕获潜在的正则表达式异常 |
try-except 块 |
优化模式 |
使用特定、高效的模式 |
避免过于宽泛的模式 |
性能考虑
import timeit
## 比较正则表达式与字符串方法的性能
def regex_method():
re.search(r'\d+', 'Hello 123 World')
def string_method():
'123' in 'Hello 123 World'
## 测量执行时间
regex_time = timeit.timeit(regex_method, number=10000)
string_time = timeit.timeit(string_method, number=10000)
print(f"正则表达式方法时间: {regex_time}")
print(f"字符串方法时间: {string_time}")