Criando uma Função Utilitária para Extração de Valores JSON
Agora que exploramos diferentes métodos para acessar dados JSON aninhados, vamos criar uma função utilitária que facilita a extração de valores de estruturas aninhadas complexas. Esta função combinará a segurança do método get() com a flexibilidade para lidar com diferentes tipos de dados.
Crie um novo arquivo chamado json_extractor.py:
import json
from typing import Any, List, Dict, Union, Optional
def extract_value(data: Dict, path: List[str], default: Any = None) -> Any:
"""
Safely extract a value from a nested dictionary using a path list.
Args:
data: The dictionary to extract value from
path: A list of keys representing the path to the value
default: The default value to return if the path doesn't exist
Returns:
The value at the specified path or the default value if not found
"""
current = data
for key in path:
if isinstance(current, dict) and key in current:
current = current[key]
else:
return default
return current
## Load JSON from file
with open('sample.json', 'r') as file:
data = json.load(file)
## Basic usage examples
name = extract_value(data, ["person", "name"], "Unknown")
age = extract_value(data, ["person", "age"], 0)
print(f"Name: {name}, Age: {age}")
## Extracting values that don't exist
occupation = extract_value(data, ["person", "occupation"], "Not specified")
print(f"Occupation: {occupation}")
## Extracting deeply nested values
email = extract_value(data, ["person", "contact", "email"], "No email")
phone = extract_value(data, ["person", "contact", "phone"], "No phone")
print(f"Email: {email}, Phone: {phone}")
## Extracting from arrays
if isinstance(extract_value(data, ["person", "hobbies"], []), list):
first_hobby = extract_value(data, ["person", "hobbies"], [])[0] if extract_value(data, ["person", "hobbies"], []) else "No hobbies"
else:
first_hobby = "No hobbies"
print(f"First hobby: {first_hobby}")
## Extracting from arrays of objects
projects = extract_value(data, ["person", "employment", "projects"], [])
if projects and len(projects) > 0:
first_project_name = extract_value(projects[0], ["name"], "Unknown project")
first_project_status = extract_value(projects[0], ["status"], "Unknown status")
print(f"First project: {first_project_name}, Status: {first_project_status}")
else:
print("No projects found")
Execute o script:
python3 json_extractor.py
Você deve ver uma saída semelhante a:
Name: John Doe, Age: 35
Occupation: Not specified
Email: john.doe@example.com, Phone: 555-123-4567
First hobby: reading
First project: Project Alpha, Status: completed
Vamos aprimorar nosso extrator para suportar a notação de ponto para caminhos, o que o torna mais intuitivo de usar. Crie um arquivo chamado enhanced_extractor.py:
import json
from typing import Any, Dict, List, Union
def get_nested_value(data: Dict, path_string: str, default: Any = None) -> Any:
"""
Safely extract a value from a nested dictionary using a dot-separated path string.
Args:
data: The dictionary to extract value from
path_string: A dot-separated string representing the path to the value
default: The default value to return if the path doesn't exist
Returns:
The value at the specified path or the default value if not found
"""
## Convert the path string to a list of keys
path = path_string.split(".")
## Start with the full dictionary
current = data
## Follow the path
for key in path:
## Handle list indexing with [n] notation
if key.endswith("]") and "[" in key:
list_key, index_str = key.split("[")
index = int(index_str[:-1]) ## Remove the closing bracket and convert to int
## Get the list
if list_key: ## If there's a key before the bracket
if not isinstance(current, dict) or list_key not in current:
return default
current = current[list_key]
## Get the item at the specified index
if not isinstance(current, list) or index >= len(current):
return default
current = current[index]
else:
## Regular dictionary key
if not isinstance(current, dict) or key not in current:
return default
current = current[key]
return current
## Load JSON from file
with open('sample.json', 'r') as file:
data = json.load(file)
## Test the enhanced extractor
print("Basic access:")
print(f"Name: {get_nested_value(data, 'person.name', 'Unknown')}")
print(f"Age: {get_nested_value(data, 'person.age', 0)}")
print(f"Occupation: {get_nested_value(data, 'person.occupation', 'Not specified')}")
print("\nNested access:")
print(f"Email: {get_nested_value(data, 'person.contact.email', 'No email')}")
print(f"City: {get_nested_value(data, 'person.address.city', 'Unknown city')}")
print("\nArray access:")
print(f"First hobby: {get_nested_value(data, 'person.hobbies[0]', 'No hobbies')}")
print(f"Second hobby: {get_nested_value(data, 'person.hobbies[1]', 'No second hobby')}")
print(f"Non-existent hobby: {get_nested_value(data, 'person.hobbies[10]', 'No such hobby')}")
print("\nComplex access:")
print(f"Company: {get_nested_value(data, 'person.employment.company', 'Unknown company')}")
print(f"First project name: {get_nested_value(data, 'person.employment.projects[0].name', 'No project')}")
print(f"Second project status: {get_nested_value(data, 'person.employment.projects[1].status', 'Unknown status')}")
print(f"Non-existent project: {get_nested_value(data, 'person.employment.projects[2].name', 'No such project')}")
print(f"Education: {get_nested_value(data, 'person.education.degree', 'No education info')}")
Execute o script:
python3 enhanced_extractor.py
Você deve ver uma saída semelhante a:
Basic access:
Name: John Doe
Age: 35
Occupation: Not specified
Nested access:
Email: john.doe@example.com
City: Anytown
Array access:
First hobby: reading
Second hobby: hiking
Non-existent hobby: No such hobby
Complex access:
Company: Tech Solutions Inc.
First project name: Project Alpha
Second project status: in-progress
Non-existent project: No such project
Education: No education info
Aplicação Prática
Agora, vamos aplicar nosso extrator JSON aprimorado a um cenário do mundo real mais complexo. Crie um arquivo chamado practical_example.py:
import json
import os
from typing import Any, Dict, List
## Import our enhanced extractor function
from enhanced_extractor import get_nested_value
## Create a more complex JSON structure for reporting
report_data = {
"company": "Global Analytics Ltd.",
"report_date": "2023-11-01",
"departments": [
{
"name": "Engineering",
"manager": "Alice Johnson",
"employee_count": 45,
"projects": [
{"id": "E001", "name": "API Gateway", "status": "completed", "budget": 125000},
{"id": "E002", "name": "Mobile App", "status": "in-progress", "budget": 200000}
]
},
{
"name": "Marketing",
"manager": "Bob Smith",
"employee_count": 28,
"projects": [
{"id": "M001", "name": "Q4 Campaign", "status": "planning", "budget": 75000}
]
},
{
"name": "Customer Support",
"manager": "Carol Williams",
"employee_count": 32,
"projects": []
}
],
"financial": {
"current_quarter": {
"revenue": 2500000,
"expenses": 1800000,
"profit_margin": 0.28
},
"previous_quarter": {
"revenue": 2300000,
"expenses": 1750000,
"profit_margin": 0.24
}
}
}
## Save this data to a JSON file for demonstration
with open('report.json', 'w') as file:
json.dump(report_data, file, indent=2)
print("Report data saved to report.json")
## Now let's extract useful information from this report
def generate_summary(data: Dict) -> str:
"""Generate a summary of the company report"""
company = get_nested_value(data, "company", "Unknown Company")
report_date = get_nested_value(data, "report_date", "Unknown Date")
## Financial summary
current_revenue = get_nested_value(data, "financial.current_quarter.revenue", 0)
previous_revenue = get_nested_value(data, "financial.previous_quarter.revenue", 0)
revenue_change = current_revenue - previous_revenue
revenue_change_percent = (revenue_change / previous_revenue * 100) if previous_revenue > 0 else 0
## Department summary
departments = get_nested_value(data, "departments", [])
total_employees = sum(get_nested_value(dept, "employee_count", 0) for dept in departments)
## Project counts
total_projects = sum(len(get_nested_value(dept, "projects", [])) for dept in departments)
completed_projects = sum(
1 for dept in departments
for proj in get_nested_value(dept, "projects", [])
if get_nested_value(proj, "status", "") == "completed"
)
## Generate summary text
summary = f"Company Report Summary for {company} as of {report_date}\n"
summary += "=" * 50 + "\n\n"
summary += "Financial Overview:\n"
summary += f"- Current Quarter Revenue: ${current_revenue:,}\n"
summary += f"- Revenue Change: ${revenue_change:,} ({revenue_change_percent:.1f}%)\n\n"
summary += "Operational Overview:\n"
summary += f"- Total Departments: {len(departments)}\n"
summary += f"- Total Employees: {total_employees}\n"
summary += f"- Total Projects: {total_projects}\n"
summary += f"- Completed Projects: {completed_projects}\n\n"
summary += "Department Details:\n"
for i, dept in enumerate(departments):
dept_name = get_nested_value(dept, "name", f"Department {i+1}")
manager = get_nested_value(dept, "manager", "No manager")
employees = get_nested_value(dept, "employee_count", 0)
projects = get_nested_value(dept, "projects", [])
summary += f"- {dept_name} (Manager: {manager})\n"
summary += f" * Employees: {employees}\n"
summary += f" * Projects: {len(projects)}\n"
if projects:
for proj in projects:
proj_name = get_nested_value(proj, "name", "Unnamed Project")
proj_status = get_nested_value(proj, "status", "unknown")
proj_budget = get_nested_value(proj, "budget", 0)
summary += f" - {proj_name} (Status: {proj_status}, Budget: ${proj_budget:,})\n"
else:
summary += " - No active projects\n"
summary += "\n"
return summary
## Generate and display the summary
summary = generate_summary(report_data)
print("\nGenerated Report Summary:")
print(summary)
## Save the summary to a file
with open('report_summary.txt', 'w') as file:
file.write(summary)
print("Summary saved to report_summary.txt")
Execute o script:
python3 practical_example.py
Você deve ver uma mensagem confirmando que os dados do relatório foram salvos, seguida por um resumo detalhado do relatório da empresa.
Verifique o arquivo de saída:
cat report_summary.txt
Este exemplo prático demonstra como nossa utilidade de extrator JSON pode ser usada para construir ferramentas de relatório robustas que lidam com dados ausentes de forma elegante. A função get_nested_value nos permite extrair com segurança valores de estruturas aninhadas complexas sem se preocupar com exceções KeyError ou NoneType.
Resumo das Melhores Práticas
Com base nas técnicas que exploramos neste laboratório, aqui estão as melhores práticas para extrair valores de objetos JSON aninhados:
- Use o método
get() em vez de indexação direta para fornecer valores padrão para chaves ausentes.
- Crie funções utilitárias para padrões comuns de extração JSON para evitar código repetitivo.
- Lide com caminhos ausentes de forma elegante fornecendo valores padrão sensíveis.
- Verifique o tipo dos valores antes de processá-los para evitar erros (por exemplo, verificar se um valor é uma lista antes de acessar um índice).
- Divida caminhos complexos em variáveis separadas para melhor legibilidade.
- Use strings de caminho com notação de ponto para um acesso mais intuitivo aos valores aninhados.
- Documente seu código de extração para deixar claro o que você está procurando na estrutura JSON.
Seguindo essas melhores práticas, você pode escrever um código mais robusto e sustentável para trabalhar com objetos JSON aninhados em Python.