Source code for sail_on_client.evaluate.utils

"""Helper functions for metrics."""

import numpy as np
import pandas as pd
from typing import List


[docs]def check_novel_validity(p_novel: np.ndarray, gt_novel: np.ndarray) -> None:
    """
    Check the validity of the inputs for per-sample novelty detection.

    Args:
        p_novel: NX1 vector with each element corresponding to probability of novelty
        gt_novel: NX1 vector with each element 0 (not novel) or 1 (novel)

    Returns:
        None
    """
    if p_novel.shape[0] != gt_novel.shape[0]:
        raise Exception(
            "Number of predicted samples not equal to number of groundtruth samples!"
        )
    if p_novel.ndim != 1:
        raise Exception(
            "Predicted probabilities must be a vector but is an array of dimension {}!".format(
                p_novel.ndim
            )
        )
    if np.any(p_novel < 0) or np.any(p_novel > 1):
        raise Exception("Predicted novel probabilities should be between 0 and 1!")
    if np.any(np.logical_and(gt_novel != 0, gt_novel != 1)):
        raise Exception(
            "Groundtruth array should only consist of 0s (non-novel) or 1s(novel)!"
        )
    return


[docs]def check_class_validity(p_class: np.ndarray, gt_class: np.ndarray) -> None:
    """
    Check the validity of the inputs for image classification.

    Inputs:
    p_class: Nx(K+1) matrix with each row corresponding to K+1 class probabilities for each sample
    gt_class: Nx1 vector with ground-truth class for each sample
    """
    if p_class.shape[0] != gt_class.shape[0]:
        raise Exception(
            "Number of predicted samples not equal to number of groundtruth samples!"
        )
    if np.any(p_class < 0) or np.any(p_class > 1):
        raise Exception("Predicted class probabilities should be between 0 and 1!")
    if p_class.ndim != 2:
        raise Exception(
            "Predicted probabilities must be a 2D matrix but is an array of dimension {}!".format(
                p_class.ndim
            )
        )
    if np.max(gt_class) >= p_class.shape[1] or np.min(gt_class < 0):
        raise Exception(
            "Groundtruth class labels must lie in the range [0-{}]!".format(
                p_class.shape[1]
            )
        )
    return


[docs]def topk_accuracy(
    p_class: np.ndarray, gt_class: np.ndarray, k: int, txt: str = ""
) -> float:
    """
    Compute top-K accuracy.

    Args:
        p_class: Nx(K+1) matrix with each row corresponding to K+1 class probabilities for each sample
        gt_class: Nx1 computevector with ground-truth class for each sample
        k: 'k' used in top-K accuracy
        txt: Text associated with accuracy

    Returns:
        top-K accuracy
    """
    check_class_validity(p_class, gt_class)
    p_class = np.argsort(-p_class)[:, :k]
    gt_class = gt_class[:, np.newaxis]
    check_zero: np.ndarray = p_class - gt_class
    correct = np.sum(np.any(check_zero == 0, axis=1).astype(int))
    return round(float(correct) / p_class.shape[0], 5)


[docs]def top3_accuracy(p_class: np.ndarray, gt_class: np.ndarray, txt: str = "") -> float:
    """
    Compute top-3 accuracy. (see topk_accuracy() for details).

    Args:
        p_class: Nx(K+1) matrix with each row corresponding to K+1 class probabilities for each sample
        gt_class: Nx1 computevector with ground-truth class for each sample
        txt: Text associated with accuracy

    Returns:
        top-3 accuracy
    """
    return topk_accuracy(p_class, gt_class, k=3, txt=txt)


[docs]def top1_accuracy(p_class: np.ndarray, gt_class: np.ndarray, txt: str = "") -> float:
    """
    Compute top-1 accuracy. (see topk_accuracy() for details).

    Args:
        p_class: Nx(K+1) matrix with each row corresponding to K+1 class probabilities for each sample
        gt_class: Nx1 computevector with ground-truth class for each sample
        txt: Text associated with accuracy

    Returns:
        top-1 accuracy
    """
    return topk_accuracy(p_class, gt_class, k=1, txt=txt)


# compute information for the robustness measures
[docs]def get_rolling_stats(
    p_class: np.ndarray, gt_class: np.ndarray, k: int = 1, window_size: int = 50
) -> List:
    """
    Compute rolling statistics which are used for robustness measures.

    Args:
        p_class: Nx(K+1) matrix with each row corresponding to K+1 class probabilities for each sample
        gt_class: Nx1 compute vector with ground-truth class for each sample
        k: 'k' used for selecting top k values
        window_size: Window size for running stats

    Returns:
        List with mean and standard deviation
    """
    p_cls_topk = np.argsort(-p_class)[:, :k]
    gt_cls_indx = gt_class[:, np.newaxis]
    check_zero_topk = p_cls_topk - gt_cls_indx
    topk_correct = 1 * (np.any(check_zero_topk == 0, axis=1))
    acc_mean = pd.Series(topk_correct).rolling(window=window_size).mean().mean()
    acc_std = pd.Series(topk_correct).rolling(window=window_size).mean().std()
    return [acc_mean, acc_std]


[docs]def get_first_detect_novelty(p_novel: np.ndarray, thresh: float) -> int:
    """
    Find the first index where novelty is detected.

    Args:
        p_novel: NX1 vector with each element corresponding to probability of novelty
        thresh: Score threshold for detecting when a sample is novel

    Returns:
        Index where an agent reports that a sample is novel
    """
    if np.sum(p_novel >= thresh) < 1:
        first_detect_novelty = len(p_novel) + 1
    else:
        first_detect_novelty = np.where(p_novel >= thresh)[0][0] + 1
    return first_detect_novelty