Arquivos
cuneiform-sign-detection-code/lib/evaluations/sign_evaluation.py
T
2020-11-19 12:18:53 +01:00

513 linhas
19 KiB
Python

import numpy as np
import pandas as pd
from tqdm import tqdm
from .config import cfg
from ..detection.detection_helpers import convert_detections_to_array
from ..utils.bbox_utils import box_iou
def voc_ap(rec, prec, use_07_metric=False):
""" ap = voc_ap(rec, prec, [use_07_metric])
Compute VOC AP given precision and recall.
If use_07_metric is true, uses the
VOC 07 11 point method (default:False).
Reference: Ross Girshick's Fast/er R-CNN code
"""
if use_07_metric:
# 11 point metric
ap = 0.
for t in np.arange(0., 1.1, 0.1):
if np.sum(rec >= t) == 0:
p = 0
else:
p = np.max(prec[rec >= t])
ap = ap + p / 11.
else:
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], rec, [1.]))
mpre = np.concatenate(([0.], prec, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
# *BASIC* AP COMPUTATION (Fast RCNN style)
def evaluate_on_gt(gt_boxes, gt_labels, num_images, all_boxes, ovthresh=None, num_classes=None, use_07_metric=False):
# Reference: Ross Girshick's Fast/er R-CNN code
if ovthresh is None:
ovthresh = cfg.TEST.TP_MIN_OVERLAP
if num_classes is None:
num_classes = cfg.TEST.NUM_CLASSES
# all detections are collected into:
# all_boxes[cls][image] = N x 5 array of detections in
# (x1, y1, x2, y2, score)
all_tp = [[[] for _ in xrange(num_images)]
for _ in xrange(num_classes)]
all_fp = [[[] for _ in xrange(num_images)]
for _ in xrange(num_classes)]
det_stats = []
total_num_tp = 0
total_false_cls = np.zeros(num_classes)
for j in xrange(1, num_classes): # num_classes
# if no detections for class available
if len(all_boxes[j][0]) == 0:
BB = np.empty((0, 4), dtype=np.float32)
confidence = np.empty(0, dtype=np.float32)
else:
BB = all_boxes[j][0][:, :4]
confidence = all_boxes[j][0][:, -1]
# sort by confidence
sorted_ind = np.argsort(-confidence)
sorted_scores = np.sort(-confidence)
BB = BB[sorted_ind, :]
inds = np.where(gt_labels == j)[0]
BBGT = gt_boxes[inds, :].astype(float)
npos = BBGT.shape[0]
det = [False] * npos
if npos > 0: # else if no gt boxes available for class, AP computation is not meaningful
# go down dets and mark TPs and FPs
nd = len(sorted_ind)
tp = np.zeros(nd)
fp = np.zeros(nd)
cls_tp = []
cls_fp = []
for d in range(nd):
bb = BB[d, :].astype(float)
ovmax = -np.inf
if BBGT.size > 0:
# compute overlaps
# intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
iymax = np.minimum(BBGT[:, 3], bb[3])
iw = np.maximum(ixmax - ixmin + 1., 0.)
ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih
# union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
overlaps = inters / uni
ovmax = np.max(overlaps)
jmax = np.argmax(overlaps)
if ovmax > ovthresh:
if not det[jmax]:
tp[d] = 1.
det[jmax] = 1
cls_tp.append(d)
else:
# double detection (unlikely due to nms)
fp[d] = 1.
cls_fp.append(d) # comment?!
else:
fp[d] = 1.
cls_fp.append(d)
# save tp detections
all_tp[j][0] = np.array(cls_tp)
# save fp detections
all_fp[j][0] = np.array(cls_fp)
# compute precision recall
fp = np.cumsum(fp)
tp = np.cumsum(tp)
rec = tp / float(npos)
# avoid divide by zero in case the first detection matches a difficult
# ground truth
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric)
# print rec, prec, ap
num_tp = np.sum(det).astype(int)
total_num_tp += num_tp
det_stats.append([npos, nd, num_tp, nd-num_tp, ap, j])
else:
if len(BB) > 0:
total_false_cls[j] += len(BB)
#print 'outlier class:', j, len(BB)
select_nonzero = total_false_cls > 0
# print(np.nonzero(select_nonzero), total_false_cls[select_nonzero])
return all_tp, all_fp, det_stats, total_num_tp #, total_false_cls
def df_evaluate_on_gt(gt_boxes_df, pred_boxes_df, ovthresh=None, num_classes=None, use_07_metric=False):
# Reference: Ross Girshick's Fast/er R-CNN code
if ovthresh is None:
ovthresh = cfg.TEST.TP_MIN_OVERLAP
if num_classes is None:
num_classes = cfg.TEST.NUM_CLASSES
num_images = gt_boxes_df.seg_idx.nunique()
# sort by confidence
pred_boxes_df = pred_boxes_df.sort_values('conf', ascending=False)
det = [False] * len(gt_boxes_df)
det_stats = []
total_num_tp = 0
for j in tqdm(xrange(1, num_classes)): # num_classes
cls_dets_df = pred_boxes_df[pred_boxes_df.cls == j]
cls_gt_df = gt_boxes_df[gt_boxes_df.cls == j]
# get bounding box and image ids
BB = cls_dets_df[['x1', 'y1', 'x2', 'y2']].values
image_ids = cls_dets_df.seg_idx.values
# confidence = cls_dets_df.conf.values
npos = len(cls_gt_df)
if npos > 0: # else if no gt boxes available for class, AP computation is not meaningful
# go down dets and mark TPs and FPs
nd = len(cls_dets_df)
tp = np.zeros(nd)
fp = np.zeros(nd)
for d in range(nd):
ovmax = -np.inf
# get bbox and seg_idx
bb = BB[d, :].astype(float)
seg_idx = image_ids[d]
# get gt boxes
seg_cls_gt_df = cls_gt_df[cls_gt_df.seg_idx == seg_idx]
BBGT = seg_cls_gt_df[['x1', 'y1', 'x2', 'y2']].values.astype(float)
if BBGT.size > 0:
# compute overlaps
# intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
iymax = np.minimum(BBGT[:, 3], bb[3])
iw = np.maximum(ixmax - ixmin + 1., 0.)
ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih
# union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
overlaps = inters / uni
ovmax = np.max(overlaps)
jmax = np.argmax(overlaps)
if ovmax > ovthresh:
# map seg_cls idx to global idx
gidx = seg_cls_gt_df.index.values[jmax]
if not det[gidx]:
tp[d] = 1.
det[gidx] = 1
else:
# double detection (unlikely due to nms)
fp[d] = 1.
else:
fp[d] = 1.
# compute num tp before cumsum (!)
num_tp = np.sum(tp).astype(int)
# compute precision recall
fp = np.cumsum(fp)
tp = np.cumsum(tp)
rec = tp / float(npos)
# avoid divide by zero in case the first detection matches a difficult
# ground truth
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric)
# print rec, prec, ap
total_num_tp += num_tp
det_stats.append([npos, nd, num_tp, nd-num_tp, ap, j])
# print np.sum(det), total_num_tp
else:
if len(cls_dets_df) > 0:
if False: # turn on for debugging to see which classes are missing
print('outlier class:', j, len(BB))
return det_stats, total_num_tp
def eval_detector(gt_boxes, gt_labels, all_boxes, ovthresh=None, verbose=True):
# evaluate
num_imgs = 1
all_tp, all_fp, det_stats, total_num_tp = evaluate_on_gt(gt_boxes, gt_labels, num_imgs, all_boxes,
ovthresh=ovthresh)
total_num_fp = int(np.sum(np.array(det_stats)[:, 3]))
# print stats
pd.set_option('display.max_rows', 50)
df_stats = pd.DataFrame(det_stats, columns=['num_gt', 'num_det', 'tp', 'fp', 'ap', 'lbl'])
if verbose:
print("total_tp", total_num_tp, "total_fp", total_num_fp,
"mAP", '{:0.4f}'.format(df_stats['ap'].mean()),
"mAP(nonzero)", '{:0.4f}'.format(df_stats['ap'].iloc[df_stats['ap'].nonzero()[0]].mean()))
acc = total_num_tp / float(total_num_tp + total_num_fp)
return acc, df_stats
def eval_detector_on_collection(gt_boxes_df, pred_boxes_df, ovthresh=None):
det_stats, total_num_tp = df_evaluate_on_gt(gt_boxes_df, pred_boxes_df, ovthresh=ovthresh)
total_num_fp = int(np.sum(np.array(det_stats)[:, 3]))
# print stats
pd.set_option('display.max_rows', 50)
df_stats = pd.DataFrame(det_stats, columns=['num_gt', 'num_det', 'tp', 'fp', 'ap', 'lbl'])
print('RESULTS ON FULL COLLECTION :')
print("total_tp", total_num_tp, "total_fp", total_num_fp,
"acc", '{:0.3f}'.format(total_num_tp / float(total_num_tp + total_num_fp)),
"mAP", '{:0.4f}'.format(df_stats['ap'].mean()),
"mAP(nonzero)", '{:0.4f}'.format(df_stats['ap'].iloc[df_stats['ap'].nonzero()[0]].mean()))
acc = total_num_tp / float(total_num_tp + total_num_fp)
return acc, df_stats
# *FAST* AP COMPUTATION
# prepare AP computation
def add_max_det(group):
# add column to dataframe
group['max_det'] = False
# select detections marked as TP
tp_group = group[group.det_type == 3]
# only one can be TP, others are double detections
if len(tp_group) > 0:
# set max entry to true
group.max_det.loc[tp_group.score.idxmax()] = True
return group
def add_det_type_column(eval_df, tp_thresh=0.5, bg_thresh=0.2):
# based on "Diagnosing Error in Object Detectors" by Hoiem et al.
# modifications:
# sim and other categories are merged, since every sign is considered similar
# bg_thresh is 0.2 instead of default 0.1
# determine detection types
type_list = []
for didx, det_rec in eval_df.iterrows():
overlap = det_rec.overlap
# class matches
if det_rec.pred == det_rec.true:
if overlap > tp_thresh:
type_list.append(3) # TP (3)
elif overlap > bg_thresh:
type_list.append(0) # FP: Loc(0) confusion
else:
type_list.append(2) # FP: BG(2) confusion
else:
if overlap > bg_thresh:
type_list.append(1) # FP: Sim/Oth(1) confusion
else:
type_list.append(2) # FP: BG(2) confusion
# add column to dataframe
eval_df['det_type'] = type_list
return eval_df
def prepare_eval_df(all_boxes, gt_boxes, gt_labels, seg_idx, tp_thresh, bg_thresh):
""" prepare eval_df that contains most information for average precision computation """
# convert all_boxes to ndarray (N x 9)
# [ID, cx, cy, score, x1, y1, x2, y2, idx] bbox = [4:8] ctr = [1:3]
sign_detections = convert_detections_to_array(all_boxes)
# compute ious between detections and gt_boxes
ious = box_iou(sign_detections[:, 4:8], gt_boxes)
# for each detection get best fit with gt box
index_gt = np.argmax(ious, axis=1)
overlap_gt = np.max(ious, axis=1)
label_gt = gt_labels[index_gt]
# collect in data frame
eval_df = pd.DataFrame(np.hstack([overlap_gt.reshape(-1, 1), label_gt.reshape(-1, 1),
sign_detections[:, [0, 3, 8]], index_gt.reshape(-1, 1)]),
columns=['overlap', 'true', 'pred', 'score', 'det_idx', 'gt_idx'])
# add column with segment index
eval_df['seg_idx'] = seg_idx
# add det_type column (0:LOC, 1:SIM, 2:BG, 3:TP)
eval_df = add_det_type_column(eval_df, tp_thresh, bg_thresh)
# compute max_det (in order to fin double detections)
eval_df = eval_df.groupby('gt_idx').apply(add_max_det)
return eval_df
# AP computation
def compute_mean_ap(col_eval_df, gt_df, num_classes=240, class_list=None, verbose=True):
""" compute mean class AP """
# define list of classes to evaluate over
if class_list is None:
class_list = np.arange(1, num_classes) # range(1, num_classes)
col_eval_df = col_eval_df.sort_values('score', ascending=False)
if False:
# filter gt according to considered segments
bbox_anno = None
gt_df = bbox_anno.anno_df[bbox_anno.anno_df.segm_idx.isin(col_eval_df.seg_idx.unique())]
gt_df['cls'] = gt_df.train_label
# compute class counts
gt_counts = gt_df.cls.value_counts()
det_stats = []
for cls_idx in class_list:
# get class predictions
cls_det_df = col_eval_df[col_eval_df.pred == cls_idx]
# get gt number
if cls_idx in gt_counts.index:
npos = gt_counts[cls_idx]
else:
npos = 0
if npos > 0:
if 1:
tp_vec = (cls_det_df.det_type == 3) & (cls_det_df.max_det == True)
fp_vec = ~tp_vec
# fp_vec = (cls_det_df.det_type < 3) | (cls_det_df.max_det == False)
fp = np.cumsum(fp_vec.values)
tp = np.cumsum(tp_vec.values)
assert np.all(tp_vec != fp_vec), np.intersect1d(tp_vec, fp_vec)
else:
# without considering double detections
fp = np.cumsum(cls_det_df.det_type < 3)
tp = np.cumsum(cls_det_df.det_type == 3)
rec = tp / float(npos)
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, False)
# sum is used to map empty list to 0
det_stats.append([npos, len(cls_det_df), np.sum(tp[-1:]), np.sum(fp[-1:]), ap, cls_idx])
else:
if len(cls_det_df) > 0:
if False: # turn on for debugging to see which classes are missing
print('outlier class:', cls_idx, len(cls_det_df))
# convert to ndarray
det_stats = np.asarray(det_stats)
mean_ap = np.mean(det_stats[:, -2])
# return aps
if verbose:
print('mAP {:.4}'.format(mean_ap))
return det_stats
def compute_global_ap(col_eval_df, gt_df, num_classes=240, verbose=True):
""" compute global AP """
# sort according to score
col_eval_df = col_eval_df.sort_values('score', ascending=False)
# not necessary, because predict classes are only in range [1, num_classes] anyways
cls_det_df = col_eval_df[col_eval_df.pred.isin(range(1, num_classes))]
if False:
# filter gt according to considered segments
bbox_anno = None
gt_df = bbox_anno.anno_df[bbox_anno.anno_df.segm_idx.isin(col_eval_df.seg_idx.unique())]
gt_df['cls'] = gt_df.train_label
# filter considered classes
gt_df = gt_df[gt_df.cls.isin(range(1, num_classes))]
# select number of gt positives
npos = len(gt_df)
# npos = len(bbox_anno.anno_df.train_label[bbox_anno.anno_df.train_label > 0])
ap = 0
if npos > 0:
if 1:
tp_vec = (cls_det_df.det_type == 3) & (cls_det_df.max_det == True)
fp_vec = ~tp_vec
fp = np.cumsum(fp_vec)
tp = np.cumsum(tp_vec)
assert np.all(tp_vec != fp_vec), np.intersect1d(tp_vec, fp_vec)
else:
# without considering double detections
fp = np.cumsum(cls_det_df.det_type < 3)
tp = np.cumsum(cls_det_df.det_type == 3)
rec = tp / float(npos)
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, False)
if False:
from sklearn.metrics import precision_recall_curve, auc
import matplotlib.pyplot as plt
# compute normalized PR curve
precision, recall, _ = precision_recall_curve(tp_vec, cls_det_df.score.values)
# plot pr curve
plt.figure()
plt.step(recall, precision, color='b', alpha=0.2, where='post')
# plt.step(rec, prec, color='b', alpha=0.2) # works, but rec values not normalized to [0, 1] range
# compare different ways to compute VOC AP (ie. area under the precision recall curve)
# first two methods should produce same results, but there are slight differences
# in doubt use original VOC AP code
# https://datascience.stackexchange.com/questions/25119/how-to-calculate-map-for-detection-task-for-the-pascal-voc-challenge
# https://github.com/rafaelpadilla/Object-Detection-Metrics
plt.title('voc ap: {:.3} | PR AUC: {:.3} | norm. PR AUC: {:.3}'.format(voc_ap(rec, prec, False),
auc(rec, prec),
auc(recall, precision)))
plt.show()
# return ap
if verbose:
print('global AP {:.4}'.format(ap))
return ap
# FP categorization
def get_type_val_frac(fp_type_series, type_values=[0, 1, 2, 3], num_fp_thres=[5, 10, 25, 50, 100]):
# type_values = [0, 1, 2, 3]
# num_fp_thres = [5, 10, 25, 50, 100]
type_val_frac = np.zeros((len(num_fp_thres), len(type_values)))
for i, thres in enumerate(num_fp_thres):
type_counts = fp_type_series[:thres].value_counts(normalize=True, sort=True)
for j, val in enumerate(type_values):
val_check = type_counts.index.values == val
if np.any(val_check):
val_idx = np.argmax(val_check)
type_val_frac[i, j] = type_counts.iloc[val_idx]
return type_val_frac