Arquivos
cuneiform-sign-detection-code/lib/evaluations/line_evaluation.py
T
2020-11-19 12:18:53 +01:00

449 linhas
18 KiB
Python

import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
from ast import literal_eval
import os.path
from ..alignment.LineFragment import compute_line_endpoints_by_hypo_idx
from ..detection.detection_helpers import radius_in_image
from ..detection.line_detection import line_params_from_pts, hess_normal_form_from_pts, dist_lineseg_line
class LineAnnotations(object):
def __init__(self, collection_name, coll_scales=None, interline_dist=128/2., relative_path='../'):
# basic paths
self.num_classes = 2
self.path_to_data_products = '{}data/annotations/'.format(relative_path)
self.coll_scales = coll_scales
self.interline_dist = interline_dist
# load collection annotations
self.anno_df = self.load_collection_annotations(collection_name)
if len(self.anno_df) > 0:
print('Load line annotations for {} dataset: {} found!'.format(collection_name,
self.anno_df.segm_idx.nunique()))
else:
print('No line annotations for {} dataset'.format(collection_name))
def load_collection_annotations(self, collection_name):
# assemble annotation file path
annotation_file = 'line_annotations_{}.csv'.format(collection_name)
annotation_file_path = '{}{}'.format(self.path_to_data_products, annotation_file)
# check if annotation file exists
if os.path.isfile(annotation_file_path):
# read annotation file
anno_df = pd.read_csv(annotation_file_path, engine='python')
# apply scale
if self.coll_scales is not None:
scale_vec = self.coll_scales[anno_df.segm_idx].values
anno_df.x = (anno_df.x * scale_vec).round().astype(int)
anno_df.y = (anno_df.y * scale_vec).round().astype(int)
# assemble line segs
anno_df = anno_df.groupby('segm_idx').apply(assemble_line_segments)
## 0) prepare meta data columns
# add ls_x_seperate column (depends on assemble_line_segments)
anno_df = anno_df.groupby(['segm_idx', 'line_idx']).apply(add_x_minmax)
anno_df = anno_df.groupby('segm_idx').apply(mark_x_seperate)
# add dist and dist_avg column
anno_df['dist'] = anno_df.line_segs.apply(set_line_param)
anno_df = anno_df.groupby(['segm_idx', 'line_idx']).apply(set_mean)
##print anno_df
# add ls_vert_nb column (depends on assemble_line_segments)
#anno_df = anno_df.groupby('segm_idx').apply(mark_vert_nb, self.interline_dist * 0.8)
## 1) group lines together
# set inline
#anno_df['inline'] = [np.intersect1d(*el) for el in anno_df[['ls_vert_nb', 'ls_x_separate']].values]
#anno_df['inline'] = [np.empty(0, dtype=int)] * len(anno_df)
anno_df['inline'] = pd.Series([np.empty(0, dtype=int)] * len(anno_df), index=anno_df.index)
# further group line segments by order and ls_x_separate (should be respected when annotating data!)
anno_df = anno_df.groupby('segm_idx').apply(group_ls_by_order, self.interline_dist * 5) # * 3
# assign actual line idx
anno_df = anno_df.groupby('segm_idx').apply(assign_actual_line_index)
## 2) refine ordering
# reset dist_avg based on gt_line_idx
anno_df = anno_df.groupby(['segm_idx', 'gt_line_idx']).apply(set_mean)
# assign actual line idx again
anno_df = anno_df.groupby('segm_idx').apply(assign_actual_line_index)
# return data frame
return anno_df
else:
# return empty list (check later with len(.) to see if file exists)
return []
def select_df_by_segm_idx(self, segm_idx):
assert len(self.anno_df) > 0, 'No annotations available!'
# wrap pandas logic
return self.anno_df[(self.anno_df.segm_idx == segm_idx)]
def visualize_line_annotations(self, segm_idx, input_im, show_line_seg_idx=False):
# plot line annotations
# get segment data frame
seg_line_df = self.select_df_by_segm_idx(segm_idx)
# check if any anno
if len(seg_line_df) > 0:
# create basic plot
fig, axes = plt.subplots(figsize=(10, 10))
grouped = seg_line_df.groupby('line_idx')
color = plt.cm.jet(np.linspace(0, 1, np.max(seg_line_df.line_idx) + 2))
for i, line_rec in grouped:
gt_line_idx = line_rec.gt_line_idx.values[0]
line_idx = line_rec.line_idx.values[0]
# print line_rec
axes.plot(line_rec.x.values, line_rec.y.values, linewidth=5, color=color[gt_line_idx],)
axes.text(line_rec.x.values[0], line_rec.y.values[0], '{}'.format(gt_line_idx),
bbox=dict(facecolor='blue', alpha=0.5), fontsize=8, color='white')
if show_line_seg_idx:
axes.text(line_rec.x.values[1], line_rec.y.values[1], '{}'.format(line_idx),
bbox=dict(facecolor='red', alpha=0.5), fontsize=8, color='white')
# axes.set_yticks([])
# axes.set_xticks([])
# plot last so that axis get overwritten (no need to remove ticks :)
axes.imshow(input_im, cmap='gray')
plt.show()
def get_hypo_line_labeling_for_segm(self, segm_idx, line_hypos_agg, verbose=False):
# select line segment ground truth
seg_ls_df = self.select_df_by_segm_idx(segm_idx).copy()
# from n points only n-1 segments -> remove empty ones
seg_ls_df = seg_ls_df[seg_ls_df.line_segs.apply(len) > 0]
# check if any annotations found
if len(seg_ls_df) > 0:
# assign hypo lines to gt line segments
gt_line_segs = seg_ls_df.line_segs.values.tolist()
gt_ls_lbl, gt_ls_dist = assign_lines_to_gt_line_segments(gt_line_segs, line_hypos_agg)
# update dataframe
seg_ls_df['hypo_line_lbl'] = gt_ls_lbl
seg_ls_df['hypo_line_dist'] = np.sqrt(gt_ls_dist)
# decide hypo line labels
seg_ls_df = seg_ls_df.groupby(['gt_line_idx']).apply(decide_hypo_line_lbl)
else:
if verbose:
print('No line ground truth available for segment idx [{}]!'.format(segm_idx))
return seg_ls_df
def get_assignment_for_line_hypos(self, segm_idx, line_hypos_agg):
# create empty dummy for cases where no annotations available
gt_line_assignment = pd.DataFrame()
if len(self.anno_df) > 0:
# get labelling
seg_ls_df = self.get_hypo_line_labeling_for_segm(segm_idx, line_hypos_agg)
if len(seg_ls_df) > 0:
# in case of multiple annotations per hypo line, pick the one with smallest distance
gt_line_assignment = seg_ls_df.sort_values('hypo_line_dist').groupby('hypo_line_lbl').head(1)[
['gt_line_idx', 'hypo_line_lbl']]
gt_line_assignment = gt_line_assignment.sort_values('gt_line_idx')
return gt_line_assignment
def visualize_hypo_line_assignments(self, segm_idx, line_hypos_agg, input_im):
# get labelling
seg_ls_df = self.get_hypo_line_labeling_for_segm(segm_idx, line_hypos_agg)
gt_ls_lbl = seg_ls_df.hypo_line_lbl.values
gt_line_segs = seg_ls_df.line_segs.values
# visualize
visualize_line_segments_with_labels(gt_line_segs, gt_ls_lbl, input_im)
def visualize_gt_lines_with_assignments(self, segm_idx, line_hypos_agg, center_im):
# gt assignment
gt_line_assignment = self.get_assignment_for_line_hypos(segm_idx, line_hypos_agg)
# get labelling
seg_ls_df = self.get_hypo_line_labeling_for_segm(segm_idx, line_hypos_agg)
gt_ls_lbl = seg_ls_df.gt_line_idx.values
gt_line_segs = seg_ls_df.line_segs.values
# get line hypo endpoints
list_hypo_endpts = [np.fliplr(np.array(compute_line_endpoints_by_hypo_idx(hidx, line_hypos_agg)).
reshape(2, 2)).ravel() for hidx in gt_line_assignment.hypo_line_lbl.values]
# get color map
color = plt.cm.spectral(np.linspace(0, 1, np.max(gt_ls_lbl) + 1)) # len(np.unique(gt_ls_lbl))
fig, axes = plt.subplots(1, 2, figsize=(15, 7))
ax = axes.ravel()
ax[0].imshow(center_im, cmap='gray')
ax[0].set_title('Input image')
ax[1].imshow(center_im * 0)
for line, li in zip(gt_line_segs, gt_ls_lbl):
p0, p1 = line
ax[1].plot((p0[0], p1[0]), (p0[1], p1[1]), color=color[li], linewidth=2)
ax[1].set_xlim((0, center_im.shape[1]))
ax[1].set_ylim((center_im.shape[0], 0))
ax[1].set_title('gt line segments and assigned line hypos')
for idx, line_pts in enumerate(list_hypo_endpts):
ax[1].plot(line_pts[::2], line_pts[1::2], '-', color=color[int(idx)], linewidth=2)
ax[1].text(line_pts[0], line_pts[1], '{}'.format(idx),
bbox=dict(facecolor='blue', alpha=0.5), fontsize=8, color='white')
#### HELPERS
# create line segment column
def assemble_line_segments(group):
# assemble line segments
line_grouped = group.groupby('line_idx')
line_segs = []
# iterate over lines
for lidx, lgroup in line_grouped:
num_pts = len(lgroup)
# iterate over segments
for sidx in range(num_pts):
# assemble segments
if sidx == num_pts - 1:
line_segs.append(())
else:
line_segs.append(((lgroup.iloc[sidx].x, lgroup.iloc[sidx].y),
(lgroup.iloc[sidx + 1].x, lgroup.iloc[sidx + 1].y)
))
# assign to group
group['line_segs'] = line_segs
return group
# group line segments to line
def add_x_minmax(group):
group['xmin'] = group.x.min()
group['xmax'] = group.x.max()
return group
def mark_x_seperate(group):
# iterate line segments
list_left_or_right = []
for i, (ls_idx, line_seg) in enumerate(group.iterrows()):
# create list of segments to the left
index_left = group.line_idx[group.xmax < line_seg.xmin].unique()
# create list of segments to the left
index_right = group.line_idx[group.xmin > line_seg.xmax].unique()
# concat and append to list
list_left_or_right.append(np.concatenate([np.array(index_left), np.array(index_right)]))
group['ls_x_separate'] = list_left_or_right
return group
def set_line_param(line_seg):
if len(line_seg) > 0:
# use basic line equation
#line_params = line_params_from_pts(line_seg[0], line_seg[1])
# use hess normal form (in corporates angle)
line_params = hess_normal_form_from_pts(line_seg[0], line_seg[1])
return line_params[1] # only interest in height
else:
return np.NaN
def set_mean(group):
group['dist_avg'] = group.dist.mean()
return group
def mark_vert_nb(group, interline_thresh):
# iterate line segments
list_vert_nb = []
for i, (ls_idx, line_seg) in enumerate(group.iterrows()):
# create list of segments to the left
index_vert_near = group.line_idx[(group.dist >= 0) &
(np.abs(group.dist_avg - line_seg.dist_avg) < interline_thresh)].unique()
list_vert_nb.append(np.array(index_vert_near))
group['ls_vert_nb'] = list_vert_nb
return group
# def make_inline_symmetric(group):
# # iterate over line segments, and make symmetric reference of inline
# for i, (sidx, line_seg) in enumerate(group.iterrows()):
# if len(line_seg.inline) > 0:
# select_inline = group.line_idx.isin(line_seg.inline)
# group.loc[select_inline, 'inline'] = select_inline.sum() * [line_seg.inline]
# # deal with type mismatch in column (did find no better way :/)
# inline_list = []
# for el in group.inline.astype(list).values:
# if isinstance(el, np.ndarray):
# inline_list.append(el)
# else:
# inline_list.append(np.array([el]))
# group['inline'] = inline_list
# # return
# return group
def group_ls_by_order(group, interline_thresh):
last_lidx = -1
last_xseparate = []
# QUICK FIX: use this to deal with loc and list inserts (loc[idx] works rather than loc[idx, col]!!)
group_inline = group.inline
# iter line_idx aggregate
# https://stackoverflow.com/questions/20067636/pandas-dataframe-get-first-row-of-each-group/49148885#49148885
ls_agg = group.sort_values('line_idx').groupby('line_idx').nth(0) #.first() is dangerous
for curr_lidx, ls_agg_rec in ls_agg.iterrows():
if last_lidx != -1:
# check if last line segment is x separate
if np.any(np.isin(ls_agg_rec.ls_x_separate, last_lidx)):
last_rec = ls_agg.loc[last_lidx]
# check if last line segment on the left
ls_left = (last_rec.xmax < ls_agg_rec.xmin)
if ls_left:
# check if vertical distance is small
vert_dist_is_small = np.abs(last_rec.dist_avg - ls_agg_rec.dist_avg) < interline_thresh
if vert_dist_is_small:
# check if already inline
if last_lidx not in ls_agg_rec.inline:
# print('merge line segments {} with {}'.format(curr_lidx, last_lidx))
# create new inlines
# do not use ls_agg_rec.inline, since it does not get updated during loop
#new_inline = np.concatenate([ls_agg_rec.inline, np.array([last_lidx])])
#new_last_inline = np.concatenate([ls_agg_rec.inline, np.array([curr_lidx])])
new_inline = np.concatenate([group_inline.loc[group.line_idx == curr_lidx].values[0], np.array([last_lidx])])
new_last_inline = np.concatenate([group_inline.loc[group.line_idx == last_lidx].values[0], np.array([curr_lidx])])
# add to data frame (loc[idx] works rather than loc[idx, col]!!)
select_line_idx = (group.line_idx == curr_lidx)
group_inline.loc[select_line_idx] = [new_inline] * select_line_idx.sum()
select_line_idx = (group.line_idx == last_lidx)
group_inline.loc[select_line_idx] = [new_last_inline] * select_line_idx.sum()
# set last values
last_lidx = curr_lidx
last_xseparate = ls_agg_rec.ls_x_separate
return group
# finalize assignment
def assign_actual_line_index(group):
# create new column
group['gt_line_idx'] = np.ones(len(group), dtype=int) * -1
# iterate over line segments and assign acutal_line_idx (segs sorted by 1) y position 2) x position)
new_idx = 0
for sidx, line_seg in group.sort_values(['dist_avg', 'x']).iterrows():
# check if index is already set
if group.loc[sidx, 'gt_line_idx'] == -1:
# assign index to line segment
group.loc[group.line_idx == line_seg.line_idx, 'gt_line_idx'] = new_idx
# assign same index to inline segments
for lidx in line_seg.inline:
group.loc[group.line_idx == lidx, 'gt_line_idx'] = new_idx
# finally increment index
new_idx += 1
# if index is already set, extend it to all inline members
else:
curr_idx = group.loc[sidx, 'gt_line_idx']
# assign same index to inline segments
for lidx in line_seg.inline:
group.loc[group.line_idx == lidx, 'gt_line_idx'] = curr_idx
return group
# for eval need to assign detection lines to ground truth lines
def assign_lines_to_gt_line_segments(gt_line_segs, line_hypos_agg):
# get line pts from polar lines
line_pts = []
for idx in range(len(line_hypos_agg)):
# compute line endpoints
line_pts.append(compute_line_endpoints_by_hypo_idx(idx, line_hypos_agg))
#line_pts.append(line_frag.compute_line_endpoints(-1, hypo_idx=i))
line_pts = np.vstack(line_pts)
line_pts = np.flip(line_pts.reshape((-1, 2, 2)), axis=2).reshape(-1, 4)
# get line segments
line_seg_pts = np.stack(gt_line_segs).reshape(len(gt_line_segs), -1)
# compute distance between line segments and lines
X2_dist = cdist(line_pts, line_seg_pts,
lambda lpts, spts: dist_lineseg_line(spts[:2], spts[2:], lpts[:2], lpts[2:]))
# assign line segments to nearest line
ls_labels = np.argmin(X2_dist, axis=0)
ls_dist = np.min(X2_dist, axis=0)
return ls_labels, ls_dist
def decide_hypo_line_lbl(group):
# count hypo line labels
uv, counts = np.unique(group.hypo_line_lbl, return_counts=True)
# get idx to all largest
largest_select = (np.max(counts) == counts)
# check if tiebreak is required
if largest_select.sum() > 1:
# for each similar large group compute mean hypo_line_dist and pick largest
tiebreak_df = group.groupby('hypo_line_lbl').hypo_line_dist.mean()
most_freq_hypo_lbl = tiebreak_df[uv[largest_select]].idxmax()
else:
most_freq_hypo_lbl = uv[np.argmax(counts)]
# assign most frequent label
group['hypo_line_lbl'] = most_freq_hypo_lbl
return group
# visualize
def visualize_line_segments_with_labels(gt_line_segs, gt_ls_lbl, center_im, line_hypo_endpts=None):
color = plt.cm.spectral(np.linspace(0, 1, np.max(gt_ls_lbl) + 1))
fig, axes = plt.subplots(1, 2, figsize=(15, 7))
ax = axes.ravel()
ax[0].imshow(center_im, cmap='gray')
ax[0].set_title('Input image')
# ax[1].imshow(lbl_ind_x, cmap='gray')
# ax[1].set_title('line det')
ax[1].imshow(center_im * 0)
for line, li in zip(gt_line_segs, gt_ls_lbl):
p0, p1 = line
ax[1].plot((p0[0], p1[0]), (p0[1], p1[1]), color=color[li], linewidth=2)
ax[1].text(p0[0], p0[1], '{}'.format(li),
bbox=dict(facecolor='blue', alpha=0.5), fontsize=8, color='white')
ax[1].set_xlim((0, center_im.shape[1]))
ax[1].set_ylim((center_im.shape[0], 0))
ax[1].set_title('gt line segments and assigned line hypos')
if line_hypo_endpts is not None:
for idx, line_pts in enumerate(line_hypo_endpts):
ax[1].plot(line_pts[::2], line_pts[1::2], '-', color=color[int(idx)], linewidth=2)