绘制马氏距离的等高线
我们将绘制两种方法计算出的马氏距离的等高线。请注意,基于稳健 MCD 的马氏距离能更好地拟合内点(黑色点),而基于 MLE 的距离受异常值(红色点)的影响更大。
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(10, 5))
## Plot data set
inlier_plot = ax.scatter(X[:, 0], X[:, 1], color="black", label="inliers")
outlier_plot = ax.scatter(
X[:, 0][-n_outliers:], X[:, 1][-n_outliers:], color="red", label="outliers"
)
ax.set_xlim(ax.get_xlim()[0], 10.0)
ax.set_title("Mahalanobis distances of a contaminated data set")
## Create meshgrid of feature 1 and feature 2 values
xx, yy = np.meshgrid(
np.linspace(plt.xlim()[0], plt.xlim()[1], 100),
np.linspace(plt.ylim()[0], plt.ylim()[1], 100),
)
zz = np.c_[xx.ravel(), yy.ravel()]
## Calculate the MLE based Mahalanobis distances of the meshgrid
mahal_emp_cov = emp_cov.mahalanobis(zz)
mahal_emp_cov = mahal_emp_cov.reshape(xx.shape)
emp_cov_contour = plt.contour(
xx, yy, np.sqrt(mahal_emp_cov), cmap=plt.cm.PuBu_r, linestyles="dashed"
)
## Calculate the MCD based Mahalanobis distances
mahal_robust_cov = robust_cov.mahalanobis(zz)
mahal_robust_cov = mahal_robust_cov.reshape(xx.shape)
robust_contour = ax.contour(
xx, yy, np.sqrt(mahal_robust_cov), cmap=plt.cm.YlOrBr_r, linestyles="dotted"
)
## Add legend
ax.legend(
[
emp_cov_contour.collections[1],
robust_contour.collections[1],
inlier_plot,
outlier_plot,
],
["MLE dist", "MCD dist", "inliers", "outliers"],
loc="upper right",
borderaxespad=0,
)
plt.show()