513 linhas
19 KiB
Python
513 linhas
19 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
|
|
from tqdm import tqdm
|
|
|
|
from .config import cfg
|
|
|
|
from ..detection.detection_helpers import convert_detections_to_array
|
|
from ..utils.bbox_utils import box_iou
|
|
|
|
|
|
def voc_ap(rec, prec, use_07_metric=False):
|
|
""" ap = voc_ap(rec, prec, [use_07_metric])
|
|
Compute VOC AP given precision and recall.
|
|
If use_07_metric is true, uses the
|
|
VOC 07 11 point method (default:False).
|
|
|
|
Reference: Ross Girshick's Fast/er R-CNN code
|
|
"""
|
|
if use_07_metric:
|
|
# 11 point metric
|
|
ap = 0.
|
|
for t in np.arange(0., 1.1, 0.1):
|
|
if np.sum(rec >= t) == 0:
|
|
p = 0
|
|
else:
|
|
p = np.max(prec[rec >= t])
|
|
ap = ap + p / 11.
|
|
else:
|
|
# correct AP calculation
|
|
# first append sentinel values at the end
|
|
mrec = np.concatenate(([0.], rec, [1.]))
|
|
mpre = np.concatenate(([0.], prec, [0.]))
|
|
|
|
# compute the precision envelope
|
|
for i in range(mpre.size - 1, 0, -1):
|
|
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
|
|
|
|
# to calculate area under PR curve, look for points
|
|
# where X axis (recall) changes value
|
|
i = np.where(mrec[1:] != mrec[:-1])[0]
|
|
|
|
# and sum (\Delta recall) * prec
|
|
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
|
|
return ap
|
|
|
|
|
|
# *BASIC* AP COMPUTATION (Fast RCNN style)
|
|
|
|
def evaluate_on_gt(gt_boxes, gt_labels, num_images, all_boxes, ovthresh=None, num_classes=None, use_07_metric=False):
|
|
# Reference: Ross Girshick's Fast/er R-CNN code
|
|
|
|
if ovthresh is None:
|
|
ovthresh = cfg.TEST.TP_MIN_OVERLAP
|
|
if num_classes is None:
|
|
num_classes = cfg.TEST.NUM_CLASSES
|
|
|
|
# all detections are collected into:
|
|
# all_boxes[cls][image] = N x 5 array of detections in
|
|
# (x1, y1, x2, y2, score)
|
|
all_tp = [[[] for _ in xrange(num_images)]
|
|
for _ in xrange(num_classes)]
|
|
all_fp = [[[] for _ in xrange(num_images)]
|
|
for _ in xrange(num_classes)]
|
|
det_stats = []
|
|
total_num_tp = 0
|
|
total_false_cls = np.zeros(num_classes)
|
|
for j in xrange(1, num_classes): # num_classes
|
|
# if no detections for class available
|
|
if len(all_boxes[j][0]) == 0:
|
|
BB = np.empty((0, 4), dtype=np.float32)
|
|
confidence = np.empty(0, dtype=np.float32)
|
|
else:
|
|
BB = all_boxes[j][0][:, :4]
|
|
confidence = all_boxes[j][0][:, -1]
|
|
|
|
# sort by confidence
|
|
sorted_ind = np.argsort(-confidence)
|
|
sorted_scores = np.sort(-confidence)
|
|
BB = BB[sorted_ind, :]
|
|
inds = np.where(gt_labels == j)[0]
|
|
BBGT = gt_boxes[inds, :].astype(float)
|
|
npos = BBGT.shape[0]
|
|
det = [False] * npos
|
|
|
|
if npos > 0: # else if no gt boxes available for class, AP computation is not meaningful
|
|
|
|
# go down dets and mark TPs and FPs
|
|
nd = len(sorted_ind)
|
|
tp = np.zeros(nd)
|
|
fp = np.zeros(nd)
|
|
cls_tp = []
|
|
cls_fp = []
|
|
for d in range(nd):
|
|
bb = BB[d, :].astype(float)
|
|
ovmax = -np.inf
|
|
|
|
if BBGT.size > 0:
|
|
# compute overlaps
|
|
# intersection
|
|
ixmin = np.maximum(BBGT[:, 0], bb[0])
|
|
iymin = np.maximum(BBGT[:, 1], bb[1])
|
|
ixmax = np.minimum(BBGT[:, 2], bb[2])
|
|
iymax = np.minimum(BBGT[:, 3], bb[3])
|
|
iw = np.maximum(ixmax - ixmin + 1., 0.)
|
|
ih = np.maximum(iymax - iymin + 1., 0.)
|
|
inters = iw * ih
|
|
|
|
# union
|
|
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
|
|
(BBGT[:, 2] - BBGT[:, 0] + 1.) *
|
|
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
|
|
|
|
overlaps = inters / uni
|
|
ovmax = np.max(overlaps)
|
|
jmax = np.argmax(overlaps)
|
|
|
|
if ovmax > ovthresh:
|
|
if not det[jmax]:
|
|
tp[d] = 1.
|
|
det[jmax] = 1
|
|
cls_tp.append(d)
|
|
else:
|
|
# double detection (unlikely due to nms)
|
|
fp[d] = 1.
|
|
cls_fp.append(d) # comment?!
|
|
else:
|
|
fp[d] = 1.
|
|
cls_fp.append(d)
|
|
|
|
# save tp detections
|
|
all_tp[j][0] = np.array(cls_tp)
|
|
# save fp detections
|
|
all_fp[j][0] = np.array(cls_fp)
|
|
# compute precision recall
|
|
fp = np.cumsum(fp)
|
|
tp = np.cumsum(tp)
|
|
rec = tp / float(npos)
|
|
# avoid divide by zero in case the first detection matches a difficult
|
|
# ground truth
|
|
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
|
|
ap = voc_ap(rec, prec, use_07_metric)
|
|
# print rec, prec, ap
|
|
num_tp = np.sum(det).astype(int)
|
|
total_num_tp += num_tp
|
|
det_stats.append([npos, nd, num_tp, nd-num_tp, ap, j])
|
|
else:
|
|
if len(BB) > 0:
|
|
total_false_cls[j] += len(BB)
|
|
#print 'outlier class:', j, len(BB)
|
|
select_nonzero = total_false_cls > 0
|
|
# print(np.nonzero(select_nonzero), total_false_cls[select_nonzero])
|
|
return all_tp, all_fp, det_stats, total_num_tp #, total_false_cls
|
|
|
|
|
|
def df_evaluate_on_gt(gt_boxes_df, pred_boxes_df, ovthresh=None, num_classes=None, use_07_metric=False):
|
|
# Reference: Ross Girshick's Fast/er R-CNN code
|
|
|
|
if ovthresh is None:
|
|
ovthresh = cfg.TEST.TP_MIN_OVERLAP
|
|
if num_classes is None:
|
|
num_classes = cfg.TEST.NUM_CLASSES
|
|
num_images = gt_boxes_df.seg_idx.nunique()
|
|
|
|
# sort by confidence
|
|
pred_boxes_df = pred_boxes_df.sort_values('conf', ascending=False)
|
|
|
|
det = [False] * len(gt_boxes_df)
|
|
|
|
det_stats = []
|
|
total_num_tp = 0
|
|
for j in tqdm(xrange(1, num_classes)): # num_classes
|
|
cls_dets_df = pred_boxes_df[pred_boxes_df.cls == j]
|
|
cls_gt_df = gt_boxes_df[gt_boxes_df.cls == j]
|
|
|
|
# get bounding box and image ids
|
|
BB = cls_dets_df[['x1', 'y1', 'x2', 'y2']].values
|
|
image_ids = cls_dets_df.seg_idx.values
|
|
# confidence = cls_dets_df.conf.values
|
|
|
|
npos = len(cls_gt_df)
|
|
|
|
if npos > 0: # else if no gt boxes available for class, AP computation is not meaningful
|
|
|
|
# go down dets and mark TPs and FPs
|
|
nd = len(cls_dets_df)
|
|
tp = np.zeros(nd)
|
|
fp = np.zeros(nd)
|
|
|
|
for d in range(nd):
|
|
ovmax = -np.inf
|
|
|
|
# get bbox and seg_idx
|
|
bb = BB[d, :].astype(float)
|
|
seg_idx = image_ids[d]
|
|
|
|
# get gt boxes
|
|
seg_cls_gt_df = cls_gt_df[cls_gt_df.seg_idx == seg_idx]
|
|
BBGT = seg_cls_gt_df[['x1', 'y1', 'x2', 'y2']].values.astype(float)
|
|
|
|
if BBGT.size > 0:
|
|
# compute overlaps
|
|
# intersection
|
|
ixmin = np.maximum(BBGT[:, 0], bb[0])
|
|
iymin = np.maximum(BBGT[:, 1], bb[1])
|
|
ixmax = np.minimum(BBGT[:, 2], bb[2])
|
|
iymax = np.minimum(BBGT[:, 3], bb[3])
|
|
iw = np.maximum(ixmax - ixmin + 1., 0.)
|
|
ih = np.maximum(iymax - iymin + 1., 0.)
|
|
inters = iw * ih
|
|
|
|
# union
|
|
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
|
|
(BBGT[:, 2] - BBGT[:, 0] + 1.) *
|
|
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
|
|
|
|
overlaps = inters / uni
|
|
ovmax = np.max(overlaps)
|
|
jmax = np.argmax(overlaps)
|
|
|
|
if ovmax > ovthresh:
|
|
# map seg_cls idx to global idx
|
|
gidx = seg_cls_gt_df.index.values[jmax]
|
|
if not det[gidx]:
|
|
tp[d] = 1.
|
|
det[gidx] = 1
|
|
else:
|
|
# double detection (unlikely due to nms)
|
|
fp[d] = 1.
|
|
else:
|
|
fp[d] = 1.
|
|
# compute num tp before cumsum (!)
|
|
num_tp = np.sum(tp).astype(int)
|
|
# compute precision recall
|
|
fp = np.cumsum(fp)
|
|
tp = np.cumsum(tp)
|
|
rec = tp / float(npos)
|
|
# avoid divide by zero in case the first detection matches a difficult
|
|
# ground truth
|
|
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
|
|
ap = voc_ap(rec, prec, use_07_metric)
|
|
# print rec, prec, ap
|
|
total_num_tp += num_tp
|
|
det_stats.append([npos, nd, num_tp, nd-num_tp, ap, j])
|
|
# print np.sum(det), total_num_tp
|
|
else:
|
|
if len(cls_dets_df) > 0:
|
|
if False: # turn on for debugging to see which classes are missing
|
|
print('outlier class:', j, len(BB))
|
|
|
|
return det_stats, total_num_tp
|
|
|
|
|
|
def eval_detector(gt_boxes, gt_labels, all_boxes, ovthresh=None, verbose=True):
|
|
# evaluate
|
|
num_imgs = 1
|
|
all_tp, all_fp, det_stats, total_num_tp = evaluate_on_gt(gt_boxes, gt_labels, num_imgs, all_boxes,
|
|
ovthresh=ovthresh)
|
|
|
|
total_num_fp = int(np.sum(np.array(det_stats)[:, 3]))
|
|
# print stats
|
|
pd.set_option('display.max_rows', 50)
|
|
df_stats = pd.DataFrame(det_stats, columns=['num_gt', 'num_det', 'tp', 'fp', 'ap', 'lbl'])
|
|
|
|
if verbose:
|
|
print("total_tp", total_num_tp, "total_fp", total_num_fp,
|
|
"mAP", '{:0.4f}'.format(df_stats['ap'].mean()),
|
|
"mAP(nonzero)", '{:0.4f}'.format(df_stats['ap'].iloc[df_stats['ap'].nonzero()[0]].mean()))
|
|
acc = total_num_tp / float(total_num_tp + total_num_fp)
|
|
|
|
return acc, df_stats
|
|
|
|
|
|
def eval_detector_on_collection(gt_boxes_df, pred_boxes_df, ovthresh=None):
|
|
det_stats, total_num_tp = df_evaluate_on_gt(gt_boxes_df, pred_boxes_df, ovthresh=ovthresh)
|
|
|
|
total_num_fp = int(np.sum(np.array(det_stats)[:, 3]))
|
|
# print stats
|
|
pd.set_option('display.max_rows', 50)
|
|
df_stats = pd.DataFrame(det_stats, columns=['num_gt', 'num_det', 'tp', 'fp', 'ap', 'lbl'])
|
|
|
|
print('RESULTS ON FULL COLLECTION :')
|
|
print("total_tp", total_num_tp, "total_fp", total_num_fp,
|
|
"acc", '{:0.3f}'.format(total_num_tp / float(total_num_tp + total_num_fp)),
|
|
"mAP", '{:0.4f}'.format(df_stats['ap'].mean()),
|
|
"mAP(nonzero)", '{:0.4f}'.format(df_stats['ap'].iloc[df_stats['ap'].nonzero()[0]].mean()))
|
|
acc = total_num_tp / float(total_num_tp + total_num_fp)
|
|
|
|
return acc, df_stats
|
|
|
|
|
|
# *FAST* AP COMPUTATION
|
|
|
|
|
|
# prepare AP computation
|
|
|
|
|
|
def add_max_det(group):
|
|
# add column to dataframe
|
|
group['max_det'] = False
|
|
# select detections marked as TP
|
|
tp_group = group[group.det_type == 3]
|
|
# only one can be TP, others are double detections
|
|
if len(tp_group) > 0:
|
|
# set max entry to true
|
|
group.max_det.loc[tp_group.score.idxmax()] = True
|
|
return group
|
|
|
|
|
|
def add_det_type_column(eval_df, tp_thresh=0.5, bg_thresh=0.2):
|
|
# based on "Diagnosing Error in Object Detectors" by Hoiem et al.
|
|
# modifications:
|
|
# sim and other categories are merged, since every sign is considered similar
|
|
# bg_thresh is 0.2 instead of default 0.1
|
|
|
|
# determine detection types
|
|
|
|
type_list = []
|
|
for didx, det_rec in eval_df.iterrows():
|
|
overlap = det_rec.overlap
|
|
# class matches
|
|
if det_rec.pred == det_rec.true:
|
|
if overlap > tp_thresh:
|
|
type_list.append(3) # TP (3)
|
|
elif overlap > bg_thresh:
|
|
type_list.append(0) # FP: Loc(0) confusion
|
|
else:
|
|
type_list.append(2) # FP: BG(2) confusion
|
|
else:
|
|
if overlap > bg_thresh:
|
|
type_list.append(1) # FP: Sim/Oth(1) confusion
|
|
else:
|
|
type_list.append(2) # FP: BG(2) confusion
|
|
|
|
# add column to dataframe
|
|
eval_df['det_type'] = type_list
|
|
|
|
return eval_df
|
|
|
|
|
|
def prepare_eval_df(all_boxes, gt_boxes, gt_labels, seg_idx, tp_thresh, bg_thresh):
|
|
""" prepare eval_df that contains most information for average precision computation """
|
|
# convert all_boxes to ndarray (N x 9)
|
|
# [ID, cx, cy, score, x1, y1, x2, y2, idx] bbox = [4:8] ctr = [1:3]
|
|
sign_detections = convert_detections_to_array(all_boxes)
|
|
|
|
# compute ious between detections and gt_boxes
|
|
ious = box_iou(sign_detections[:, 4:8], gt_boxes)
|
|
|
|
# for each detection get best fit with gt box
|
|
index_gt = np.argmax(ious, axis=1)
|
|
overlap_gt = np.max(ious, axis=1)
|
|
label_gt = gt_labels[index_gt]
|
|
|
|
# collect in data frame
|
|
eval_df = pd.DataFrame(np.hstack([overlap_gt.reshape(-1, 1), label_gt.reshape(-1, 1),
|
|
sign_detections[:, [0, 3, 8]], index_gt.reshape(-1, 1)]),
|
|
columns=['overlap', 'true', 'pred', 'score', 'det_idx', 'gt_idx'])
|
|
# add column with segment index
|
|
eval_df['seg_idx'] = seg_idx
|
|
# add det_type column (0:LOC, 1:SIM, 2:BG, 3:TP)
|
|
eval_df = add_det_type_column(eval_df, tp_thresh, bg_thresh)
|
|
# compute max_det (in order to fin double detections)
|
|
eval_df = eval_df.groupby('gt_idx').apply(add_max_det)
|
|
|
|
return eval_df
|
|
|
|
|
|
# AP computation
|
|
|
|
|
|
def compute_mean_ap(col_eval_df, gt_df, num_classes=240, class_list=None, verbose=True):
|
|
""" compute mean class AP """
|
|
|
|
# define list of classes to evaluate over
|
|
if class_list is None:
|
|
class_list = np.arange(1, num_classes) # range(1, num_classes)
|
|
col_eval_df = col_eval_df.sort_values('score', ascending=False)
|
|
if False:
|
|
# filter gt according to considered segments
|
|
bbox_anno = None
|
|
gt_df = bbox_anno.anno_df[bbox_anno.anno_df.segm_idx.isin(col_eval_df.seg_idx.unique())]
|
|
gt_df['cls'] = gt_df.train_label
|
|
|
|
# compute class counts
|
|
gt_counts = gt_df.cls.value_counts()
|
|
|
|
det_stats = []
|
|
for cls_idx in class_list:
|
|
# get class predictions
|
|
cls_det_df = col_eval_df[col_eval_df.pred == cls_idx]
|
|
# get gt number
|
|
if cls_idx in gt_counts.index:
|
|
npos = gt_counts[cls_idx]
|
|
else:
|
|
npos = 0
|
|
if npos > 0:
|
|
if 1:
|
|
tp_vec = (cls_det_df.det_type == 3) & (cls_det_df.max_det == True)
|
|
fp_vec = ~tp_vec
|
|
# fp_vec = (cls_det_df.det_type < 3) | (cls_det_df.max_det == False)
|
|
fp = np.cumsum(fp_vec.values)
|
|
tp = np.cumsum(tp_vec.values)
|
|
|
|
assert np.all(tp_vec != fp_vec), np.intersect1d(tp_vec, fp_vec)
|
|
else:
|
|
# without considering double detections
|
|
fp = np.cumsum(cls_det_df.det_type < 3)
|
|
tp = np.cumsum(cls_det_df.det_type == 3)
|
|
|
|
rec = tp / float(npos)
|
|
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
|
|
ap = voc_ap(rec, prec, False)
|
|
# sum is used to map empty list to 0
|
|
det_stats.append([npos, len(cls_det_df), np.sum(tp[-1:]), np.sum(fp[-1:]), ap, cls_idx])
|
|
else:
|
|
if len(cls_det_df) > 0:
|
|
if False: # turn on for debugging to see which classes are missing
|
|
print('outlier class:', cls_idx, len(cls_det_df))
|
|
# convert to ndarray
|
|
det_stats = np.asarray(det_stats)
|
|
mean_ap = np.mean(det_stats[:, -2])
|
|
# return aps
|
|
if verbose:
|
|
print('mAP {:.4}'.format(mean_ap))
|
|
return det_stats
|
|
|
|
|
|
def compute_global_ap(col_eval_df, gt_df, num_classes=240, verbose=True):
|
|
""" compute global AP """
|
|
|
|
# sort according to score
|
|
col_eval_df = col_eval_df.sort_values('score', ascending=False)
|
|
# not necessary, because predict classes are only in range [1, num_classes] anyways
|
|
cls_det_df = col_eval_df[col_eval_df.pred.isin(range(1, num_classes))]
|
|
if False:
|
|
# filter gt according to considered segments
|
|
bbox_anno = None
|
|
gt_df = bbox_anno.anno_df[bbox_anno.anno_df.segm_idx.isin(col_eval_df.seg_idx.unique())]
|
|
gt_df['cls'] = gt_df.train_label
|
|
# filter considered classes
|
|
gt_df = gt_df[gt_df.cls.isin(range(1, num_classes))]
|
|
|
|
# select number of gt positives
|
|
npos = len(gt_df)
|
|
# npos = len(bbox_anno.anno_df.train_label[bbox_anno.anno_df.train_label > 0])
|
|
|
|
ap = 0
|
|
if npos > 0:
|
|
if 1:
|
|
tp_vec = (cls_det_df.det_type == 3) & (cls_det_df.max_det == True)
|
|
fp_vec = ~tp_vec
|
|
fp = np.cumsum(fp_vec)
|
|
tp = np.cumsum(tp_vec)
|
|
|
|
assert np.all(tp_vec != fp_vec), np.intersect1d(tp_vec, fp_vec)
|
|
else:
|
|
# without considering double detections
|
|
fp = np.cumsum(cls_det_df.det_type < 3)
|
|
tp = np.cumsum(cls_det_df.det_type == 3)
|
|
|
|
rec = tp / float(npos)
|
|
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
|
|
ap = voc_ap(rec, prec, False)
|
|
|
|
if False:
|
|
from sklearn.metrics import precision_recall_curve, auc
|
|
import matplotlib.pyplot as plt
|
|
|
|
# compute normalized PR curve
|
|
precision, recall, _ = precision_recall_curve(tp_vec, cls_det_df.score.values)
|
|
# plot pr curve
|
|
plt.figure()
|
|
plt.step(recall, precision, color='b', alpha=0.2, where='post')
|
|
# plt.step(rec, prec, color='b', alpha=0.2) # works, but rec values not normalized to [0, 1] range
|
|
|
|
# compare different ways to compute VOC AP (ie. area under the precision recall curve)
|
|
# first two methods should produce same results, but there are slight differences
|
|
# in doubt use original VOC AP code
|
|
# https://datascience.stackexchange.com/questions/25119/how-to-calculate-map-for-detection-task-for-the-pascal-voc-challenge
|
|
# https://github.com/rafaelpadilla/Object-Detection-Metrics
|
|
plt.title('voc ap: {:.3} | PR AUC: {:.3} | norm. PR AUC: {:.3}'.format(voc_ap(rec, prec, False),
|
|
auc(rec, prec),
|
|
auc(recall, precision)))
|
|
plt.show()
|
|
|
|
# return ap
|
|
if verbose:
|
|
print('global AP {:.4}'.format(ap))
|
|
return ap
|
|
|
|
|
|
# FP categorization
|
|
|
|
|
|
def get_type_val_frac(fp_type_series, type_values=[0, 1, 2, 3], num_fp_thres=[5, 10, 25, 50, 100]):
|
|
# type_values = [0, 1, 2, 3]
|
|
# num_fp_thres = [5, 10, 25, 50, 100]
|
|
|
|
type_val_frac = np.zeros((len(num_fp_thres), len(type_values)))
|
|
for i, thres in enumerate(num_fp_thres):
|
|
type_counts = fp_type_series[:thres].value_counts(normalize=True, sort=True)
|
|
for j, val in enumerate(type_values):
|
|
val_check = type_counts.index.values == val
|
|
if np.any(val_check):
|
|
val_idx = np.argmax(val_check)
|
|
type_val_frac[i, j] = type_counts.iloc[val_idx]
|
|
return type_val_frac
|
|
|
|
|
|
|
|
|