"""Utilities for measuring the performance of a model."""
from abc import abstractmethod
import torch
import torch.nn.functional as F
from mlbench_core.utils import AverageMeter
from mlbench_core.utils.pytorch.distributed import global_average
try:
import sacrebleu
except ImportError as e:
pass
class MLBenchMetric(object):
def __init__(self):
self.average_meter = AverageMeter()
@abstractmethod
def __call__(self, output, target):
pass
def reset(self):
self.average_meter = AverageMeter()
def update(self, perc, size):
self.average_meter.update(perc, size)
def average(self):
return global_average(self.average_meter.sum, self.average_meter.count)
[docs]class TopKAccuracy(MLBenchMetric):
r"""Top K accuracy of an output.
Counts a prediction as correct if the target value is in the top ``k``
predictions, false otherwise, and returns the number of correct
instances relative to total instances (0.0 to 100.0).
Args:
topk (int, optional): The number of top predictions to consider.
Default: ``1``
"""
def __init__(self, topk=1):
super(TopKAccuracy, self).__init__()
self.topk = topk
[docs] def __call__(self, output, target):
"""Computes the precision@k for the specified values of k
Args:
output (:obj:`torch.Tensor`): Predictions of a model
target (:obj:`torch.Tensor`): Target labels
Example:
>>> m = nn.Softmax()
>>> input = torch.randn(10, 50)
>>> preds = m(input)
>>> targets = torch.randint(0, 1, (10,50))
>>> topk = TopKAccuracy(5)
>>> precision = topk(preds, targets)
Returns:
float
"""
batch_size = target.size(0)
output = self._preprocess_output(output)
_, pred = output.topk(self.topk, 1, True, True)
pred = pred.t().float()
correct = pred.eq(target.reshape(1, -1).expand_as(pred).float())
correct_k = correct[: self.topk].reshape(-1).float().sum(0, keepdim=True)
return correct_k.mul_(100.0 / batch_size)
def _preprocess_output(self, output):
dim = output.size(1)
if dim == 1:
output = torch.cat((1 - output, output), 1) # Increase dimension
dim = output.size(1)
if self.topk >= dim:
raise ValueError(
"Cannot compute top {} accuracy with "
"input dimension {}".format(self.topk, dim)
)
return output
@property
[docs] def name(self):
"""str: Name of this metric."""
return "Prec@{}".format(self.topk)
class Perplexity(MLBenchMetric):
"""Language Model Perplexity score."""
@property
def name(self):
"""str: Name of this metric."""
return "Perplexity"
def __call__(self, output, target):
"""Computes the perplexity given output and target. Output should be logits
Args:
output (:obj:`torch.Tensor`): Not Used
target (:obj:`torch.Tensor`): Not Used
Returns:
float
"""
loss = F.cross_entropy(output, target)
return torch.exp(loss)
class DiceCoefficient(MLBenchMetric):
def __call__(self, output, target):
"""Computes the Dice Coefficient of a Binary classification problem
Args:
output (:obj:`torch.Tensor`): Output of model
target (:obj:`torch.Tensor`): Target labels
Returns:
loss (:obj:`torch.Tensor`): Dice Coefficient in [0,1]
"""
eps = 0.0001
output, target = output.float(), target.float()
self.inter = torch.dot(output.view(-1), target.view(-1))
self.union = torch.sum(output) + torch.sum(target) + eps
t = (2 * self.inter.float() + eps) / self.union.float()
return t
@property
def name(self):
"""str: Name of this metric"""
return "Dice Coefficient"
class F1Score(MLBenchMetric):
def __init__(self, threshold=0.5, eps=1e-9):
"""F1-Score metric
Args:
threshold (float): Threshold for prediction probability
"""
super(F1Score, self).__init__()
self.threshold = threshold
self.eps = eps
def __call__(self, output, target):
"""Computes the F1-Score of a Binary classification problem
Args:
output (:obj:`torch.Tensor`): Output of model
target (:obj:`torch.Tensor`): Target labels
Returns:
loss (:obj:`torch.Tensor`): F1-Score in [0,1]
"""
y_pred = torch.ge(output.float(), self.threshold).float()
y_true = target.float()
true_positive = (y_pred * y_true).sum(dim=0)
precision = true_positive.div(y_pred.sum(dim=0).add(self.eps))
recall = true_positive.div(y_true.sum(dim=0).add(self.eps))
return torch.mean(
(precision * recall).div(precision + recall + self.eps).mul(2)
)
@property
def name(self):
return "F1-Score"
class BLEUScore(MLBenchMetric):
def __init__(self, use_raw=False):
"""Bilingual Evaluation Understudy score"""
super(BLEUScore, self).__init__()
self.use_raw = use_raw
def __call__(self, output, target):
"""Computes the BLEU score of a translation task
Args:
output (:obj:`torch.Tensor`): Translated output (not tokenized)
target (:obj:`torch.Tensor`): Target labels
Returns:
loss (:obj:`torch.Tensor`): BLEU score
"""
if self.use_raw:
bleu_score = sacrebleu.raw_corpus_bleu(output, [target]).score
else:
bleu_score = sacrebleu.corpus_bleu(
output, [target], tokenize="intl", lowercase=True
).score
return torch.tensor([bleu_score])
@property
def name(self):
return "BLEU-Score"