Real-World Data Analysis Scenarios
1. Student Exam Scores Analysis
import numpy as np
def analyze_exam_scores(scores):
median_score = np.median(scores)
mean_score = np.mean(scores)
print(f"Exam Scores Analysis:")
print(f"Median Score: {median_score}")
print(f"Mean Score: {mean_score:.2f}")
if median_score > mean_score:
print("The median suggests less impact from extreme scores.")
else:
print("Some extreme scores might be affecting the average.")
## Example exam scores
exam_scores = [65, 70, 72, 74, 75, 75, 76, 80, 85, 90, 95, 120]
analyze_exam_scores(exam_scores)
2. Income Distribution Analysis
import numpy as np
def analyze_income_distribution(incomes):
median_income = np.median(incomes)
mean_income = np.mean(incomes)
print(f"Income Distribution Analysis:")
print(f"Median Income: ${median_income:,.2f}")
print(f"Mean Income: ${mean_income:,.2f}")
## Calculate income inequality
income_range = max(incomes) - min(incomes)
print(f"Income Range: ${income_range:,.2f}")
Data Filtering and Preprocessing
import numpy as np
def remove_outliers(data, threshold=1.5):
median = np.median(data)
q1 = np.percentile(data, 25)
q3 = np.percentile(data, 75)
iqr = q3 - q1
lower_bound = q1 - (threshold * iqr)
upper_bound = q3 + (threshold * iqr)
filtered_data = [x for x in data if lower_bound <= x <= upper_bound]
return filtered_data
## Example dataset with outliers
raw_data = [10, 12, 13, 14, 15, 16, 17, 18, 19, 100, 200, 300]
cleaned_data = remove_outliers(raw_data)
print("Original Data:", raw_data)
print("Cleaned Data:", cleaned_data)
Comparative Analysis Methods
Comparing Multiple Datasets
import numpy as np
def compare_datasets(datasets):
medians = [np.median(dataset) for dataset in datasets]
print("Dataset Median Comparison:")
for i, median in enumerate(medians, 1):
print(f"Dataset {i} Median: {median}")
return medians
## Multiple datasets
dataset1 = [1, 2, 3, 4, 5]
dataset2 = [2, 4, 6, 8, 10]
dataset3 = [5, 10, 15, 20, 25]
comparison_results = compare_datasets([dataset1, dataset2, dataset3])
| Domain |
Use Case |
Benefit |
| Finance |
Stock Price Analysis |
Reduces impact of extreme market fluctuations |
| Healthcare |
Patient Measurements |
Provides robust central tendency metric |
| Education |
Performance Evaluation |
Minimizes skew from exceptional performers |
| Research |
Data Normalization |
Handles asymmetric distributions |
graph TD
A[Median in Data Analysis] --> B[Outlier Detection]
A --> C[Performance Measurement]
A --> D[Distribution Understanding]
B --> E[Remove Extreme Values]
C --> F[Robust Central Tendency]
D --> G[Identify Data Characteristics]
LabEx recommends practicing these practical examples to develop a comprehensive understanding of median calculations in real-world scenarios.