Practical Regex Applications
Data Validation
Email Validation
import re
def validate_email(email):
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return re.match(pattern, email) is not None
## Examples
emails = [
'[email protected]',
'invalid.email',
'[email protected]'
]
for email in emails:
print(f"{email}: {validate_email(email)}")
Password Strength Checker
def check_password_strength(password):
patterns = [
r'.{8,}', ## Minimum 8 characters
r'[A-Z]', ## At least one uppercase
r'[a-z]', ## At least one lowercase
r'\d', ## At least one digit
r'[!@#$%^&*]' ## At least one special character
]
return all(re.search(pattern, password) for pattern in patterns)
passwords = ['weak', 'Strong1!', 'LabEx2023']
for pwd in passwords:
print(f"{pwd}: {check_password_strength(pwd)}")
Log Parsing
import re
log_entries = [
'2023-06-15 14:30:45 ERROR Database connection failed',
'2023-06-15 15:45:22 INFO Server started successfully',
'2023-06-16 09:12:33 WARNING Low disk space'
]
log_pattern = r'(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2}) (\w+) (.+)'
for entry in log_entries:
match = re.match(log_pattern, entry)
if match:
date, time, level, message = match.groups()
print(f"Date: {date}, Time: {time}, Level: {level}, Message: {message}")
Parsing CSV-like Strings
def parse_csv_like_string(data):
pattern = r'"([^"]*)"'
return re.findall(pattern, data)
csv_data = 'Name,Age,City\n"John Doe",30,"New York"\n"Jane Smith",25,"San Francisco"'
parsed_data = parse_csv_like_string(csv_data)
print(parsed_data)
Web Scraping Preprocessing
def extract_urls(text):
url_pattern = r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+[/\w .-]*'
return re.findall(url_pattern, text)
sample_text = """
Check out these websites:
https://www.labex.io
http://example.com/page
Invalid: not a url
"""
urls = extract_urls(sample_text)
print(urls)
Text Transformation
def standardize_phone_number(phone):
## Remove non-digit characters
digits = re.sub(r'\D', '', phone)
## Format to (XXX) XXX-XXXX
if len(digits) == 10:
return re.sub(r'(\d{3})(\d{3})(\d{4})', r'(\1) \2-\3', digits)
return phone
phone_numbers = [
'123-456-7890',
'(987) 654-3210',
'1234567890'
]
for number in phone_numbers:
print(f"{number} -> {standardize_phone_number(number)}")
Regex Application Workflow
graph TD
A[Raw Data Input] --> B[Regex Pattern]
B --> C{Pattern Matching}
C -->|Match Found| D[Extract/Transform Data]
C -->|No Match| E[Handle Exception]
D --> F[Processed Data]
Technique |
Recommendation |
Compilation |
Use re.compile() for repeated patterns |
Specificity |
Write precise patterns |
Readability |
Use verbose regex with re.VERBOSE flag |
Error Handling |
Always validate regex matches |
Complex Example: Log Analysis
def analyze_system_logs(log_file):
error_pattern = r'(\d{4}-\d{2}-\d{2}) .*ERROR: (.+)'
critical_errors = []
with open(log_file, 'r') as file:
for line in file:
match = re.search(error_pattern, line)
if match:
date, error_message = match.groups()
critical_errors.append((date, error_message))
return critical_errors
## Hypothetical usage
logs = analyze_system_logs('/var/log/system.log')