ROC Multiclase Uno Contra el Resto
La estrategia multiclase Uno Contra el Resto (OvR) consiste en calcular una curva ROC para cada una de las n_classes. En cada paso, una clase dada se considera como la clase positiva y el resto de las clases se consideran como la clase negativa en masa. En este paso, mostramos cómo calcular la curva ROC utilizando la estrategia multiclase OvR.
from sklearn.preprocessing import LabelBinarizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, roc_auc_score
import matplotlib.pyplot as plt
from sklearn.metrics import RocCurveDisplay
## Binarize the target using the OvR strategy
label_binarizer = LabelBinarizer().fit(y_train)
y_onehot_test = label_binarizer.transform(y_test)
## Train a Logistic Regression model
classifier = LogisticRegression()
y_score = classifier.fit(X_train, y_train).predict_proba(X_test)
## Calculate ROC curve and ROC AUC score for each class
fpr, tpr, roc_auc = dict(), dict(), dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_onehot_test[:, i], y_score[:, i])
roc_auc[i] = roc_auc_score(y_onehot_test[:, i], y_score[:, i])
## Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_onehot_test.ravel(), y_score.ravel())
roc_auc["micro"] = roc_auc_score(y_onehot_test, y_score, multi_class="ovr", average="micro")
## Compute macro-average ROC curve and ROC area
## Aggregate the true/false positive rates per class
fpr["macro"], tpr["macro"] = [], []
for i in range(n_classes):
fpr_averaged, tpr_averaged = [], []
for j in range(n_classes):
if i!= j:
fpr_averaged += list(fpr[j])
tpr_averaged += list(tpr[j])
fpr_averaged = np.array(fpr_averaged)
tpr_averaged = np.array(tpr_averaged)
fpr["macro"].append(fpr_averaged)
tpr["macro"].append(tpr_averaged)
fpr["macro"] = np.concatenate(fpr["macro"])
tpr["macro"] = np.concatenate(tpr["macro"])
roc_auc["macro"] = roc_auc_score(y_onehot_test, y_score, multi_class="ovr", average="macro")
## Plot ROC curves for each class and the micro/macro averages
fig, ax = plt.subplots(figsize=(6, 6))
colors = ["aqua", "darkorange", "cornflowerblue"]
for i, color in zip(range(n_classes), colors):
RocCurveDisplay.from_predictions(
y_onehot_test[:, i],
y_score[:, i],
name=f"Curva ROC de la clase {target_names[i]} (AUC = {roc_auc[i]:.2f})",
color=color,
ax=ax,
plot_micro=False,
plot_macro=False,
)
RocCurveDisplay.from_predictions(
y_onehot_test.ravel(),
y_score.ravel(),
name=f"Curva ROC Micro-promedio (AUC = {roc_auc['micro']:.2f})",
color="deeppink",
linestyle=":",
linewidth=4,
ax=ax,
)
plt.plot(
fpr["macro"],
tpr["macro"],
label=f"Curva ROC Macro-promedio (AUC = {roc_auc['macro']:.2f})",
color="navy",
linestyle=":",
linewidth=4,
)
plt.plot([0, 1], [0, 1], "k--", label="Nivel de azar")
plt.axis("square")
plt.xlabel("Tasa de Falsos Positivos")
plt.ylabel("Tasa de Verdaderos Positivos")
plt.title("Curvas ROC Uno Contra el Resto")
plt.legend()
plt.show()