Source code for sail_on_client.evaluate.utils
"""Helper functions for metrics."""
import numpy as np
import pandas as pd
from typing import List
[docs]def check_novel_validity(p_novel: np.ndarray, gt_novel: np.ndarray) -> None:
"""
Check the validity of the inputs for per-sample novelty detection.
Args:
p_novel: NX1 vector with each element corresponding to probability of novelty
gt_novel: NX1 vector with each element 0 (not novel) or 1 (novel)
Returns:
None
"""
if p_novel.shape[0] != gt_novel.shape[0]:
raise Exception(
"Number of predicted samples not equal to number of groundtruth samples!"
)
if p_novel.ndim != 1:
raise Exception(
"Predicted probabilities must be a vector but is an array of dimension {}!".format(
p_novel.ndim
)
)
if np.any(p_novel < 0) or np.any(p_novel > 1):
raise Exception("Predicted novel probabilities should be between 0 and 1!")
if np.any(np.logical_and(gt_novel != 0, gt_novel != 1)):
raise Exception(
"Groundtruth array should only consist of 0s (non-novel) or 1s(novel)!"
)
return
[docs]def check_class_validity(p_class: np.ndarray, gt_class: np.ndarray) -> None:
"""
Check the validity of the inputs for image classification.
Inputs:
p_class: Nx(K+1) matrix with each row corresponding to K+1 class probabilities for each sample
gt_class: Nx1 vector with ground-truth class for each sample
"""
if p_class.shape[0] != gt_class.shape[0]:
raise Exception(
"Number of predicted samples not equal to number of groundtruth samples!"
)
if np.any(p_class < 0) or np.any(p_class > 1):
raise Exception("Predicted class probabilities should be between 0 and 1!")
if p_class.ndim != 2:
raise Exception(
"Predicted probabilities must be a 2D matrix but is an array of dimension {}!".format(
p_class.ndim
)
)
if np.max(gt_class) >= p_class.shape[1] or np.min(gt_class < 0):
raise Exception(
"Groundtruth class labels must lie in the range [0-{}]!".format(
p_class.shape[1]
)
)
return
[docs]def topk_accuracy(
p_class: np.ndarray, gt_class: np.ndarray, k: int, txt: str = ""
) -> float:
"""
Compute top-K accuracy.
Args:
p_class: Nx(K+1) matrix with each row corresponding to K+1 class probabilities for each sample
gt_class: Nx1 computevector with ground-truth class for each sample
k: 'k' used in top-K accuracy
txt: Text associated with accuracy
Returns:
top-K accuracy
"""
check_class_validity(p_class, gt_class)
p_class = np.argsort(-p_class)[:, :k]
gt_class = gt_class[:, np.newaxis]
check_zero: np.ndarray = p_class - gt_class
correct = np.sum(np.any(check_zero == 0, axis=1).astype(int))
return round(float(correct) / p_class.shape[0], 5)
[docs]def top3_accuracy(p_class: np.ndarray, gt_class: np.ndarray, txt: str = "") -> float:
"""
Compute top-3 accuracy. (see topk_accuracy() for details).
Args:
p_class: Nx(K+1) matrix with each row corresponding to K+1 class probabilities for each sample
gt_class: Nx1 computevector with ground-truth class for each sample
txt: Text associated with accuracy
Returns:
top-3 accuracy
"""
return topk_accuracy(p_class, gt_class, k=3, txt=txt)
[docs]def top1_accuracy(p_class: np.ndarray, gt_class: np.ndarray, txt: str = "") -> float:
"""
Compute top-1 accuracy. (see topk_accuracy() for details).
Args:
p_class: Nx(K+1) matrix with each row corresponding to K+1 class probabilities for each sample
gt_class: Nx1 computevector with ground-truth class for each sample
txt: Text associated with accuracy
Returns:
top-1 accuracy
"""
return topk_accuracy(p_class, gt_class, k=1, txt=txt)
# compute information for the robustness measures
[docs]def get_rolling_stats(
p_class: np.ndarray, gt_class: np.ndarray, k: int = 1, window_size: int = 50
) -> List:
"""
Compute rolling statistics which are used for robustness measures.
Args:
p_class: Nx(K+1) matrix with each row corresponding to K+1 class probabilities for each sample
gt_class: Nx1 compute vector with ground-truth class for each sample
k: 'k' used for selecting top k values
window_size: Window size for running stats
Returns:
List with mean and standard deviation
"""
p_cls_topk = np.argsort(-p_class)[:, :k]
gt_cls_indx = gt_class[:, np.newaxis]
check_zero_topk = p_cls_topk - gt_cls_indx
topk_correct = 1 * (np.any(check_zero_topk == 0, axis=1))
acc_mean = pd.Series(topk_correct).rolling(window=window_size).mean().mean()
acc_std = pd.Series(topk_correct).rolling(window=window_size).mean().std()
return [acc_mean, acc_std]
[docs]def get_first_detect_novelty(p_novel: np.ndarray, thresh: float) -> int:
"""
Find the first index where novelty is detected.
Args:
p_novel: NX1 vector with each element corresponding to probability of novelty
thresh: Score threshold for detecting when a sample is novel
Returns:
Index where an agent reports that a sample is novel
"""
if np.sum(p_novel >= thresh) < 1:
first_detect_novelty = len(p_novel) + 1
else:
first_detect_novelty = np.where(p_novel >= thresh)[0][0] + 1
return first_detect_novelty