Now that we have explored different methods for accessing nested JSON data, let's create a utility function that makes it easier to extract values from complex nested structures. This function will combine the safety of the get() method with the flexibility to handle different types of data.
Create a new file called json_extractor.py:
import json
from typing import Any, List, Dict, Union, Optional
def extract_value(data: Dict, path: List[str], default: Any = None) -> Any:
"""
Safely extract a value from a nested dictionary using a path list.
Args:
data: The dictionary to extract value from
path: A list of keys representing the path to the value
default: The default value to return if the path doesn't exist
Returns:
The value at the specified path or the default value if not found
"""
current = data
for key in path:
if isinstance(current, dict) and key in current:
current = current[key]
else:
return default
return current
## Load JSON from file
with open('sample.json', 'r') as file:
data = json.load(file)
## Basic usage examples
name = extract_value(data, ["person", "name"], "Unknown")
age = extract_value(data, ["person", "age"], 0)
print(f"Name: {name}, Age: {age}")
## Extracting values that don't exist
occupation = extract_value(data, ["person", "occupation"], "Not specified")
print(f"Occupation: {occupation}")
## Extracting deeply nested values
email = extract_value(data, ["person", "contact", "email"], "No email")
phone = extract_value(data, ["person", "contact", "phone"], "No phone")
print(f"Email: {email}, Phone: {phone}")
## Extracting from arrays
if isinstance(extract_value(data, ["person", "hobbies"], []), list):
first_hobby = extract_value(data, ["person", "hobbies"], [])[0] if extract_value(data, ["person", "hobbies"], []) else "No hobbies"
else:
first_hobby = "No hobbies"
print(f"First hobby: {first_hobby}")
## Extracting from arrays of objects
projects = extract_value(data, ["person", "employment", "projects"], [])
if projects and len(projects) > 0:
first_project_name = extract_value(projects[0], ["name"], "Unknown project")
first_project_status = extract_value(projects[0], ["status"], "Unknown status")
print(f"First project: {first_project_name}, Status: {first_project_status}")
else:
print("No projects found")
Run the script:
python3 json_extractor.py
You should see output similar to:
Name: John Doe, Age: 35
Occupation: Not specified
Email: john.doe@example.com, Phone: 555-123-4567
First hobby: reading
First project: Project Alpha, Status: completed
Let's enhance our extractor to support dot notation for paths, which makes it more intuitive to use. Create a file called enhanced_extractor.py:
import json
from typing import Any, Dict, List, Union
def get_nested_value(data: Dict, path_string: str, default: Any = None) -> Any:
"""
Safely extract a value from a nested dictionary using a dot-separated path string.
Args:
data: The dictionary to extract value from
path_string: A dot-separated string representing the path to the value
default: The default value to return if the path doesn't exist
Returns:
The value at the specified path or the default value if not found
"""
## Convert the path string to a list of keys
path = path_string.split(".")
## Start with the full dictionary
current = data
## Follow the path
for key in path:
## Handle list indexing with [n] notation
if key.endswith("]") and "[" in key:
list_key, index_str = key.split("[")
index = int(index_str[:-1]) ## Remove the closing bracket and convert to int
## Get the list
if list_key: ## If there's a key before the bracket
if not isinstance(current, dict) or list_key not in current:
return default
current = current[list_key]
## Get the item at the specified index
if not isinstance(current, list) or index >= len(current):
return default
current = current[index]
else:
## Regular dictionary key
if not isinstance(current, dict) or key not in current:
return default
current = current[key]
return current
## Load JSON from file
with open('sample.json', 'r') as file:
data = json.load(file)
## Test the enhanced extractor
print("Basic access:")
print(f"Name: {get_nested_value(data, 'person.name', 'Unknown')}")
print(f"Age: {get_nested_value(data, 'person.age', 0)}")
print(f"Occupation: {get_nested_value(data, 'person.occupation', 'Not specified')}")
print("\nNested access:")
print(f"Email: {get_nested_value(data, 'person.contact.email', 'No email')}")
print(f"City: {get_nested_value(data, 'person.address.city', 'Unknown city')}")
print("\nArray access:")
print(f"First hobby: {get_nested_value(data, 'person.hobbies[0]', 'No hobbies')}")
print(f"Second hobby: {get_nested_value(data, 'person.hobbies[1]', 'No second hobby')}")
print(f"Non-existent hobby: {get_nested_value(data, 'person.hobbies[10]', 'No such hobby')}")
print("\nComplex access:")
print(f"Company: {get_nested_value(data, 'person.employment.company', 'Unknown company')}")
print(f"First project name: {get_nested_value(data, 'person.employment.projects[0].name', 'No project')}")
print(f"Second project status: {get_nested_value(data, 'person.employment.projects[1].status', 'Unknown status')}")
print(f"Non-existent project: {get_nested_value(data, 'person.employment.projects[2].name', 'No such project')}")
print(f"Education: {get_nested_value(data, 'person.education.degree', 'No education info')}")
Run the script:
python3 enhanced_extractor.py
You should see output similar to:
Basic access:
Name: John Doe
Age: 35
Occupation: Not specified
Nested access:
Email: john.doe@example.com
City: Anytown
Array access:
First hobby: reading
Second hobby: hiking
Non-existent hobby: No such hobby
Complex access:
Company: Tech Solutions Inc.
First project name: Project Alpha
Second project status: in-progress
Non-existent project: No such project
Education: No education info
Practical Application
Now let's apply our enhanced JSON extractor to a more complex real-world scenario. Create a file called practical_example.py:
import json
import os
from typing import Any, Dict, List
## Import our enhanced extractor function
from enhanced_extractor import get_nested_value
## Create a more complex JSON structure for reporting
report_data = {
"company": "Global Analytics Ltd.",
"report_date": "2023-11-01",
"departments": [
{
"name": "Engineering",
"manager": "Alice Johnson",
"employee_count": 45,
"projects": [
{"id": "E001", "name": "API Gateway", "status": "completed", "budget": 125000},
{"id": "E002", "name": "Mobile App", "status": "in-progress", "budget": 200000}
]
},
{
"name": "Marketing",
"manager": "Bob Smith",
"employee_count": 28,
"projects": [
{"id": "M001", "name": "Q4 Campaign", "status": "planning", "budget": 75000}
]
},
{
"name": "Customer Support",
"manager": "Carol Williams",
"employee_count": 32,
"projects": []
}
],
"financial": {
"current_quarter": {
"revenue": 2500000,
"expenses": 1800000,
"profit_margin": 0.28
},
"previous_quarter": {
"revenue": 2300000,
"expenses": 1750000,
"profit_margin": 0.24
}
}
}
## Save this data to a JSON file for demonstration
with open('report.json', 'w') as file:
json.dump(report_data, file, indent=2)
print("Report data saved to report.json")
## Now let's extract useful information from this report
def generate_summary(data: Dict) -> str:
"""Generate a summary of the company report"""
company = get_nested_value(data, "company", "Unknown Company")
report_date = get_nested_value(data, "report_date", "Unknown Date")
## Financial summary
current_revenue = get_nested_value(data, "financial.current_quarter.revenue", 0)
previous_revenue = get_nested_value(data, "financial.previous_quarter.revenue", 0)
revenue_change = current_revenue - previous_revenue
revenue_change_percent = (revenue_change / previous_revenue * 100) if previous_revenue > 0 else 0
## Department summary
departments = get_nested_value(data, "departments", [])
total_employees = sum(get_nested_value(dept, "employee_count", 0) for dept in departments)
## Project counts
total_projects = sum(len(get_nested_value(dept, "projects", [])) for dept in departments)
completed_projects = sum(
1 for dept in departments
for proj in get_nested_value(dept, "projects", [])
if get_nested_value(proj, "status", "") == "completed"
)
## Generate summary text
summary = f"Company Report Summary for {company} as of {report_date}\n"
summary += "=" * 50 + "\n\n"
summary += "Financial Overview:\n"
summary += f"- Current Quarter Revenue: ${current_revenue:,}\n"
summary += f"- Revenue Change: ${revenue_change:,} ({revenue_change_percent:.1f}%)\n\n"
summary += "Operational Overview:\n"
summary += f"- Total Departments: {len(departments)}\n"
summary += f"- Total Employees: {total_employees}\n"
summary += f"- Total Projects: {total_projects}\n"
summary += f"- Completed Projects: {completed_projects}\n\n"
summary += "Department Details:\n"
for i, dept in enumerate(departments):
dept_name = get_nested_value(dept, "name", f"Department {i+1}")
manager = get_nested_value(dept, "manager", "No manager")
employees = get_nested_value(dept, "employee_count", 0)
projects = get_nested_value(dept, "projects", [])
summary += f"- {dept_name} (Manager: {manager})\n"
summary += f" * Employees: {employees}\n"
summary += f" * Projects: {len(projects)}\n"
if projects:
for proj in projects:
proj_name = get_nested_value(proj, "name", "Unnamed Project")
proj_status = get_nested_value(proj, "status", "unknown")
proj_budget = get_nested_value(proj, "budget", 0)
summary += f" - {proj_name} (Status: {proj_status}, Budget: ${proj_budget:,})\n"
else:
summary += " - No active projects\n"
summary += "\n"
return summary
## Generate and display the summary
summary = generate_summary(report_data)
print("\nGenerated Report Summary:")
print(summary)
## Save the summary to a file
with open('report_summary.txt', 'w') as file:
file.write(summary)
print("Summary saved to report_summary.txt")
Run the script:
python3 practical_example.py
You should see a message confirming that the report data was saved, followed by a detailed summary of the company report.
Check the output file:
cat report_summary.txt
This practical example demonstrates how our JSON extractor utility can be used to build robust reporting tools that gracefully handle missing data. The get_nested_value function allows us to safely extract values from complex nested structures without worrying about KeyErrors or NoneType exceptions.
Best Practices Summary
Based on the techniques we've explored in this lab, here are the best practices for extracting values from nested JSON objects:
- Use the
get() method instead of direct indexing to provide default values for missing keys.
- Create utility functions for common JSON extraction patterns to avoid repetitive code.
- Handle missing paths gracefully by providing sensible default values.
- Type check values before processing them to avoid errors (e.g., checking if a value is a list before accessing an index).
- Break down complex paths into separate variables for better readability.
- Use path strings with dot notation for more intuitive access to nested values.
- Document your extraction code to make it clear what you're looking for in the JSON structure.
By following these best practices, you can write more robust and maintainable code for working with nested JSON objects in Python.