绘制实验1的结果
我们使用 matplotlib
和 seaborn
库来绘制第一个实验的结果。当 n_clusters
> n_classes
时,兰德指数趋于饱和。其他未经调整的指标,如V度量,显示出聚类数量与样本数量之间的线性关系。经过机会调整的指标,如ARI和AMI,显示出一些围绕平均分数0.0的随机变化,与样本数量和聚类数量无关。
import matplotlib.pyplot as plt
import seaborn as sns
n_samples = 1000
n_classes = 10
n_clusters_range = np.linspace(2, 100, 10).astype(int)
plots = []
names = []
sns.color_palette("colorblind")
plt.figure(1)
for marker, (score_name, score_func) in zip("d^vx.,", score_funcs):
scores = fixed_classes_uniform_labelings_scores(
score_func, n_samples, n_clusters_range, n_classes=n_classes
)
plots.append(
plt.errorbar(
n_clusters_range,
scores.mean(axis=1),
scores.std(axis=1),
alpha=0.8,
linewidth=1,
marker=marker,
)[0]
)
names.append(score_name)
plt.title(
"Clustering measures for random uniform labeling\n"
f"against reference assignment with {n_classes} classes"
)
plt.xlabel(f"Number of clusters (Number of samples is fixed to {n_samples})")
plt.ylabel("Score value")
plt.ylim(bottom=-0.05, top=1.05)
plt.legend(plots, names, bbox_to_anchor=(0.5, 0.5))
plt.show()