Complex Regex Patterns
Nested Capture Groups
import re
def parse_complex_data(text):
pattern = r'((\w+)\s(\w+))\s\[(\d+)\]'
match = re.match(pattern, text)
if match:
full_name = match.group(1)
first_name = match.group(2)
last_name = match.group(3)
id_number = match.group(4)
return {
'full_name': full_name,
'first_name': first_name,
'last_name': last_name,
'id': id_number
}
text = 'John Doe [12345]'
result = parse_complex_data(text)
print(result)
Non-Capturing Groups
import re
def extract_domain_info(url):
## (?:) creates a non-capturing group
pattern = r'https?://(?:www\.)?([^/]+)'
match = re.match(pattern, url)
if match:
domain = match.group(1)
return domain
url = 'https://www.example.com/path'
domain = extract_domain_info(url)
print(domain)
Lookahead and Lookbehind
import re
def validate_password(password):
## Positive lookahead for complex password rules
pattern = r'^(?=.*[A-Z])(?=.*[a-z])(?=.*\d)(?=.*[!@#$%^&*]).{8,}$'
return re.match(pattern, password) is not None
passwords = [
'Weak1',
'StrongPass123!',
'NoSpecialChar123'
]
for pwd in passwords:
print(f"{pwd}: {validate_password(pwd)}")
Regex Pattern Complexity Flow
graph TD
A[Regex Pattern] --> B{Complexity Level}
B -->|Simple| C[Basic Matching]
B -->|Intermediate| D[Capture Groups]
B -->|Advanced| E[Lookaheads/Lookbehinds]
E --> F[Complex Validation]
Advanced Regex Techniques
Technique |
Symbol |
Description |
Example |
Non-Capturing Group |
(?:) |
Groups without capturing |
(?:www\.)? |
Positive Lookahead |
(?=) |
Matches if followed by |
(?=.*\d) |
Negative Lookahead |
(?!) |
Matches if not followed |
(?!.*secret) |
Lookbehind |
(?<=) |
Matches if preceded by |
(?<=\$)\d+ |
Recursive Parsing
import re
def parse_nested_json(text):
pattern = r'\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}'
matches = re.findall(pattern, text)
return matches
json_like = '{key1: value1} {nested: {inner: value}}'
result = parse_nested_json(json_like)
print(result)
import re
import timeit
def optimize_regex(pattern):
## Compile regex for better performance
compiled_pattern = re.compile(pattern)
return compiled_pattern
## Benchmark regex compilation
pattern = r'(\w+)@(\w+)\.(\w+)'
compilation_time = timeit.timeit(
lambda: re.compile(pattern),
number=10000
)
print(f"Compilation Time: {compilation_time}")
Key Takeaways
- Complex regex patterns require careful design
- Use non-capturing and lookahead groups strategically
- Compile regex patterns for performance
- LabEx recommends incremental learning of advanced techniques