520 linhas
19 KiB
Python
520 linhas
19 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
|
|
import torch
|
|
from torchvision import transforms as trafos
|
|
|
|
from skimage import draw
|
|
from skimage.color import label2rgb
|
|
from skimage.transform import hough_line, hough_line_peaks, probabilistic_hough_line
|
|
from skimage.morphology import skeletonize, skeletonize_3d, thin, medial_axis, watershed
|
|
|
|
from scipy import ndimage as ndi
|
|
from scipy.spatial.distance import pdist, cdist, squareform
|
|
|
|
from ..detection.detection_helpers import label_map2image, coord_in_image
|
|
|
|
|
|
# prepare input for line detection
|
|
|
|
def preprocess_line_input(pil_im, scale, shift=None):
|
|
""" produces five copies of the segment at slightly different offsets
|
|
|
|
:param pil_im: tablet segment that is to be processed
|
|
:param scale: scale which should be used for resizing
|
|
:param shift: offset shift used to produce five-fold oversampling
|
|
:return: 4D tensor with 5xCxWxH
|
|
"""
|
|
if shift is None:
|
|
shift = 0 # cfg.TEST.SHIFT
|
|
# compute scaled size
|
|
imw, imh = pil_im.size
|
|
imw = int(imw * scale)
|
|
imh = int(imh * scale)
|
|
# determine crop size
|
|
crop_sz = [int(imw - shift), int(imh - shift)]
|
|
# tensor-space transforms
|
|
ts_transform = trafos.Compose([
|
|
trafos.ToTensor(),
|
|
trafos.Normalize(mean=[0.5], std=[1]), # normalize
|
|
])
|
|
# compose transforms
|
|
tablet_transform = trafos.Compose([
|
|
trafos.Lambda(lambda x: x.convert('L')), # convert to gray
|
|
trafos.Resize((imh, imw)), # resize according to scale
|
|
trafos.FiveCrop((crop_sz[1], crop_sz[0])), # oversample
|
|
trafos.Lambda(
|
|
lambda crops: torch.stack([ts_transform(crop) for crop in crops])), # returns a 4D tensor
|
|
])
|
|
# apply transforms
|
|
im_list = tablet_transform(pil_im)
|
|
return im_list
|
|
|
|
|
|
def apply_detector(inputs, model_fcn, device):
|
|
|
|
with torch.no_grad(): # faster, less memory usage
|
|
inputs = inputs.to(device)
|
|
# apply network
|
|
output = model_fcn(inputs)
|
|
# convert to numpy
|
|
output = output.data.cpu().numpy()
|
|
return output
|
|
|
|
|
|
# prepare transliteration for line detection
|
|
|
|
def prepare_transliteration(tl_df, num_lines, stats):
|
|
"""
|
|
ATTENTION: this filters the transliteration according to status!
|
|
"""
|
|
|
|
# prepare transliteration for line detection
|
|
if num_lines > 0:
|
|
# only visible/not broken
|
|
tl_df = tl_df[tl_df.status > 0]
|
|
# compute line length
|
|
tl_df = tl_df.groupby('line_idx').apply(compute_line_length_from_tl, stats)
|
|
# get line statistics
|
|
num_vis_lines = tl_df.line_idx.nunique() # num visible lines (not broken lines)
|
|
# len_lines = tl_df.groupby('line_idx').pos_idx.count()
|
|
len_lines = tl_df.line_idx.value_counts()
|
|
len_min, len_max = len_lines.min(), len_lines.max()
|
|
else:
|
|
# TODO: if no tl info available, use initial line detection results to set these parameters
|
|
len_min, len_max = 4, 12
|
|
num_vis_lines = 40
|
|
|
|
return tl_df, num_vis_lines, len_min, len_max
|
|
|
|
|
|
# extract lines with hough transform
|
|
|
|
def compute_hough_transform(line_det_map1, line_det_map2, re_focus_angle=True):
|
|
# focus theta for cuneiform horizontal lines
|
|
theta_range = np.linspace(np.deg2rad(83), np.deg2rad(97), 50)
|
|
# theta_range = np.linspace(np.deg2rad(-90) ,np.deg2rad(90), 180) # normal range
|
|
|
|
# Classic straight-line Hough transform (usually angles from -90 to +90)
|
|
h, theta, d = hough_line(line_det_map1, theta=theta_range)
|
|
|
|
# debug
|
|
# plt.imshow(np.log(1 + h), extent=[np.rad2deg(theta[-1]), np.rad2deg(theta[0]), d[-1], d[0]], cmap='gray', aspect=1/1.5)
|
|
# plt.show()
|
|
|
|
# focus angle and re-run
|
|
if re_focus_angle:
|
|
# get peaks
|
|
accum, angles, dists = hough_line_peaks(h, theta, d, min_distance=1, min_angle=16, num_peaks=50)
|
|
|
|
# get median angle
|
|
m_angle = np.median(np.rad2deg(angles))
|
|
# modify theta
|
|
theta_range = np.linspace(np.deg2rad(m_angle - 2), np.deg2rad(m_angle + 2), 50)
|
|
theta_range2 = np.linspace(np.deg2rad(m_angle - 3), np.deg2rad(m_angle + 3), 50)
|
|
# Classic straight-line Hough transform (usually angles from -90 to +90)
|
|
h, theta, d = hough_line(line_det_map2, theta=theta_range)
|
|
|
|
return h, theta, d, theta_range, theta_range2
|
|
|
|
|
|
# group lines together that are "close"
|
|
|
|
def shoelace_formula(points):
|
|
''' compute are of polygon according to shoelace
|
|
requires ordering of point coordinates
|
|
https://en.wikipedia.org/wiki/Shoelace_formula
|
|
|
|
:param points: 2xn matrix, where n is number of points (points need to be ordered!!)
|
|
:return: area of polygon
|
|
'''
|
|
area = 0
|
|
dmat = np.ones((2, 2))
|
|
for i in range(points.shape[1]):
|
|
dmat[:, 0] = points[:, i]
|
|
dmat[:, 1] = points[:, (i+1) % points.shape[1]]
|
|
area += np.linalg.det(dmat.transpose())
|
|
return np.abs(area) / 2.
|
|
|
|
|
|
def area_between_two_line_segments(spt1, spt2, lpt1, lpt2):
|
|
# compute area between line segments
|
|
# assume: line segments do not intersect and
|
|
# assume: pts should be order according to x-axis
|
|
# this means a valid order would be [spt1, spt2, lpt2, lpt1]
|
|
return shoelace_formula(np.stack([spt1, spt2, lpt2, lpt1], axis=1))
|
|
|
|
|
|
def nearby_and_near_parallel_2(l1, l2, interline_distance, interval=[0, 10]):
|
|
# compute area between line segments over interval
|
|
angle1, rad1 = l1
|
|
angle2, rad2 = l2
|
|
spt1, spt2 = line_pts_from_polar_line(angle1, rad1, x0=interval[0], x1=interval[1])
|
|
lpt1, lpt2 = line_pts_from_polar_line(angle2, rad2, x0=interval[0], x1=interval[1])
|
|
# use shoelace method
|
|
area = area_between_two_line_segments(spt1, spt2, lpt1, lpt2)
|
|
# check threshold
|
|
interval_interline_area = interline_distance * np.abs(interval[1] - interval[0])
|
|
|
|
# print area, interval_interline_area / 2.
|
|
if area < interval_interline_area / 2.:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
def nearby_and_near_parallel(l1, l2, interline_distance):
|
|
# simple filter
|
|
angle1, rad1 = l1
|
|
angle2, rad2 = l2
|
|
if np.abs(rad1 - rad2) < interline_distance/2. and np.abs(np.rad2deg(angle1-angle2)) < 1.0:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
def do_intersect_in_interval(l1, l2, interval):
|
|
# y = mx+c or in parametric form
|
|
# \rho = x \cos \theta + y \sin \theta
|
|
# \rho (radius) perpendicular distance from origin to the line
|
|
# \theta is the angle formed by this perpendicular line
|
|
|
|
angle1, rad1 = l1
|
|
angle2, rad2 = l2
|
|
lower, upper = interval
|
|
|
|
quotient = (np.cos(angle1) - np.cos(angle2))
|
|
|
|
if quotient == 0: # same angles
|
|
if np.abs(rad1 - rad2) < 3: # same radius
|
|
return True
|
|
else:
|
|
return False
|
|
else:
|
|
# compute intersection coordinate
|
|
x_intersect = (rad1 - rad2) / quotient
|
|
|
|
# inside interval
|
|
if (x_intersect >= lower) and (x_intersect <= upper):
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
def compute_group_labels_from_dists(X_dist):
|
|
# assign labels to groups
|
|
# iterate over pairwise distances and
|
|
|
|
# get squareform
|
|
XX = squareform(X_dist)
|
|
# set dummy labels
|
|
labels = -np.ones(XX.shape[0])
|
|
# label lines while checking for neighbourhood
|
|
for ii in range(len(labels)):
|
|
if labels[ii] == -1:
|
|
labels[ii] = ii
|
|
# for each row in squareform indicates potential neighbors
|
|
for idx in np.where(XX[ii, :] > 0)[0]:
|
|
labels[idx] = labels[ii]
|
|
return labels
|
|
|
|
|
|
# associate lines with line segments
|
|
|
|
|
|
def line_pts_from_polar_line(angle, dist, x0=0, x1=10):
|
|
# computes two points defining a line from polar line representation
|
|
x0, x1 = x0 * np.ones_like(angle), x1 * np.ones_like(angle) # x0 = np.zeros_like(angle)
|
|
y0 = (dist - x0 * np.cos(angle)) / np.sin(angle)
|
|
y1 = (dist - x1 * np.cos(angle)) / np.sin(angle)
|
|
return (x0, y0), (x1, y1)
|
|
|
|
|
|
def line_params_from_pts(lpt1, lpt2):
|
|
# compute parameters a, b for line representation y = a * x + b
|
|
a = (lpt2[1] - lpt1[1])/float(lpt2[0] - lpt1[0])
|
|
b = lpt1[1] - lpt1[0] * a
|
|
return a, b
|
|
|
|
|
|
def normal_form_from_pts(p, q):
|
|
# takes two points an computes
|
|
# https://de.wikipedia.org/wiki/Normalenform#Aus_der_Zweipunkteform
|
|
|
|
# normal
|
|
n = np.array([-(q[1]-p[1]),
|
|
q[0]-p[0]], dtype=float)
|
|
# normalize
|
|
n_0 = n / np.linalg.norm(n)
|
|
# distance from origin
|
|
dist = np.dot(n_0, p)
|
|
return n_0, dist
|
|
|
|
|
|
def hess_normal_form_from_pts(p, q):
|
|
n_0, dist = normal_form_from_pts(p, q)
|
|
# angle in rad
|
|
rad = np.arctan(n_0[1]/n_0[0])
|
|
return rad, dist
|
|
|
|
|
|
def _offset_pt_to_normal_form_line(pt, n_0, dist):
|
|
return np.dot(n_0, pt) - dist
|
|
|
|
|
|
def _shift_pt_to_normal_form_line(pt, n_0, shift_dist):
|
|
return pt + n_0 * shift_dist
|
|
|
|
|
|
def clip_pt_using_normal_form(pt, n_0, dist, min_dist):
|
|
# compute offset
|
|
offset_line = _offset_pt_to_normal_form_line(pt, n_0, dist)
|
|
# check if correction necessary
|
|
if np.abs(offset_line) > min_dist:
|
|
# compute correction
|
|
if offset_line >= 0:
|
|
correction = offset_line - min_dist
|
|
else:
|
|
correction = offset_line + min_dist
|
|
# apply correction
|
|
pt = _shift_pt_to_normal_form_line(pt, n_0, -correction)
|
|
return pt
|
|
|
|
|
|
def clip_bbox_using_line(bbox, line_pts_arr, min_dist=128/2.):
|
|
# get normal form of line
|
|
n_0, dist = normal_form_from_pts(line_pts_arr[0], line_pts_arr[1])
|
|
# compute distance to line and decide if pt needs to be shifted
|
|
pt_list = []
|
|
# iterate over two bounding box coordinates
|
|
for pt in [bbox[:2], bbox[2:]]:
|
|
pt = clip_pt_using_normal_form(pt, n_0, dist, min_dist)
|
|
pt_list.append(pt)
|
|
|
|
return np.concatenate(pt_list)
|
|
|
|
|
|
def clip_bbox_using_line_segmentation(bbox, line_pts_arr, skeleton, min_dist=128/2.):
|
|
# get normal form of line
|
|
n_0, dist = normal_form_from_pts(line_pts_arr[0], line_pts_arr[1])
|
|
|
|
# use bbox boundaries to crop pts from line segmentation
|
|
# seg_line_pts = np.nonzero(skeleton[:, int(bbox[0]):int(bbox[2])])[0]
|
|
# faster but more exclusive (probably worth the speedup)
|
|
seg_line_pts = np.nonzero(skeleton[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])])[0] + int(bbox[1])
|
|
|
|
dist_delta = 0
|
|
if len(seg_line_pts) > 3:
|
|
# compute average y location of segmentation line pts
|
|
seg_line_cy = np.mean(seg_line_pts)
|
|
# determine local distance delta from linear model to skeleton
|
|
pt = [(bbox[0] + bbox[2]) / 2., seg_line_cy]
|
|
dist_delta = _offset_pt_to_normal_form_line(pt, n_0, dist)
|
|
# correct normal form of line [n_0, dist] using delta, ie. alter dist
|
|
dist = dist + dist_delta
|
|
#print dist_delta, dist - dist_delta, dist
|
|
|
|
# compute distance to line and decide if pt needs to be shifted
|
|
pt_list = []
|
|
# iterate over two bounding box coordinates
|
|
for pt in [bbox[:2], bbox[2:]]:
|
|
pt = clip_pt_using_normal_form(pt, n_0, dist, min_dist)
|
|
pt_list.append(pt)
|
|
|
|
return np.concatenate(pt_list)
|
|
|
|
|
|
def dist_pt_line(pt, lpt1, lpt2):
|
|
# compute squared 'perpendicular distance'
|
|
# pt is point
|
|
# lpt are line points
|
|
# returns minimum (perpendicular) distance from point to line
|
|
|
|
# assumes line representation of form y = a * x + b
|
|
(a, b) = line_params_from_pts(lpt1, lpt2)
|
|
# from: energy based geometric model fitting (2010)
|
|
# https://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line#Another_formula
|
|
return (np.abs(pt[1]-a*pt[0]-b)/np.sqrt(a**2+1))**2
|
|
|
|
|
|
def dist_lineseg_line(spt1, spt2, lpt1, lpt2):
|
|
# computes the distance between line (unbounded) and line segment (bounded)
|
|
# spt are line segment points
|
|
# lpt are line points
|
|
# returns minimum distance from line segment to line
|
|
return min(dist_pt_line(spt1, lpt1, lpt2),
|
|
dist_pt_line(spt2, lpt1, lpt2))
|
|
|
|
|
|
def assign_line_segments_to_lines(line_segs, line_hypos, x1=10):
|
|
# get line pts from polar lines
|
|
polar_lines = line_hypos.groupby('label').mean()[['angle', 'dist']].values
|
|
line_pts = line_pts_from_polar_line(polar_lines[:, 0], polar_lines[:, 1], x1=x1)
|
|
line_pts = np.transpose(np.concatenate(line_pts))
|
|
# get line segments
|
|
line_seg_pts = np.stack(line_segs).reshape(len(line_segs), -1)
|
|
|
|
# compute distance between line segments and lines
|
|
X2_dist = cdist(line_pts, line_seg_pts,
|
|
lambda lpts, spts: dist_lineseg_line(spts[:2], spts[2:], lpts[:2], lpts[2:]))
|
|
# assign line segments to nearest line
|
|
ls_labels = np.argmin(X2_dist, axis=0)
|
|
|
|
return ls_labels
|
|
|
|
|
|
# associate line segments with segments
|
|
|
|
|
|
def associate_segments_with_lines(lbl_ind, line_segs, ls_labels, group2line):
|
|
# create markers from line segments
|
|
im_marker = np.zeros_like(lbl_ind)
|
|
for line, li in zip(line_segs, ls_labels):
|
|
p0, p1 = line
|
|
rr, cc = draw.line(p0[1], p0[0], p1[1], p1[0])
|
|
im_marker[rr, cc] = int(group2line[li]) + 1 # avoid background class
|
|
# plt.imshow(im_marker)
|
|
|
|
# use water shed to assign labels to segments
|
|
distance = ndi.distance_transform_edt(lbl_ind)
|
|
segm_labels = watershed(-distance, im_marker, mask=lbl_ind)
|
|
return segm_labels, im_marker
|
|
|
|
|
|
# map segment lbls to image resolution (deal with network architecture with offset)
|
|
|
|
def compute_image_label_map(segm_labels, image_shape, padding=0):
|
|
# collect patch boxes and their labels
|
|
list_patch_boxes, list_patch_labels = [], []
|
|
for lbl_idx in np.unique(segm_labels):
|
|
if lbl_idx > 0:
|
|
# for index compute coordinate boxes
|
|
vx, vy = np.where(segm_labels == lbl_idx)
|
|
patch_boxes = label_map2image(vy, vx, segm_labels.shape[::-1]).astype(int)
|
|
# append
|
|
list_patch_boxes.append(patch_boxes)
|
|
list_patch_labels.append(patch_boxes.shape[0] * [lbl_idx])
|
|
# vis_detections(center_im, patch_boxes, max_vis=200, labels="")
|
|
|
|
patch_boxes = np.concatenate(list_patch_boxes, axis=0)
|
|
patch_labels = np.concatenate(list_patch_labels, axis=0)
|
|
# vis_detections(center_im, np.concatenate(list_patch_boxes, axis=0) , max_vis=1000, labels="")
|
|
|
|
# create segmentation map from boxes and labels
|
|
seg_canvas = np.zeros(image_shape[:2])
|
|
for bb, lbl in zip(patch_boxes, patch_labels):
|
|
pad = padding
|
|
bb[:2] = bb[:2] - pad
|
|
bb[2:] = bb[2:] + pad
|
|
# print patch_box, patch_lbl
|
|
seg_canvas[bb[1]:bb[3], bb[0]:bb[2]] = lbl
|
|
|
|
return seg_canvas
|
|
|
|
|
|
def compute_line_length_from_tl(group, stats, b=128.): # 128 / (2 * 32)
|
|
# collect widths
|
|
widths = np.zeros(len(group))
|
|
for ii, (sidx, sign_rec) in enumerate(group.iterrows()):
|
|
widths[ii] = stats.get_sign_width(sign_rec.lbl, sign_width=1) * b
|
|
# compute offsets and line length
|
|
sign_xpos = widths.cumsum() - (widths / 2.)
|
|
line_len = widths.sum()
|
|
# add columns to group
|
|
group['prior_line_len'] = np.rint(line_len)
|
|
group.loc[group.index, 'prior_sign_xoff'] = np.rint(sign_xpos)
|
|
group.loc[group.index, 'prior_sign_width'] = np.rint(widths)
|
|
|
|
return group
|
|
|
|
|
|
##### full pipeline
|
|
|
|
|
|
def post_process_line_detections(lbl_ind_x, num_lines, len_min, len_max, verbose=True):
|
|
# identify lines and merge them if too close together
|
|
# line hypothesis are stored in line_hypos dataframe
|
|
# line_hypos.label indicates which lines are grouped together(merged) -> line_hypo_agg
|
|
|
|
# (0) perform skeletonization
|
|
skeleton = skeletonize(lbl_ind_x)
|
|
# skeleton = skeletonize_3d(lbl_ind)
|
|
# skeleton = thin(lbl_ind)
|
|
|
|
# (1) compute hough transform
|
|
h, theta, d, theta_range, theta_range2 = compute_hough_transform(skeleton, skeleton) # skeleton, lbl_ind_x,
|
|
|
|
# (I) find peaks in hough transform
|
|
num_peaks_factor = 1.9 # 1.5 1.6 v007: 1.9 v047: 2.5 # line detector dependent (VIP)
|
|
hl_peak_threshold = (h.max() / float(len_max)) / 2. * len_min # 2. # has impact on lenght of lines found
|
|
accums, angles, dists = hough_line_peaks(h, theta, d, min_distance=1, min_angle=14,
|
|
num_peaks=int(num_lines * num_peaks_factor),
|
|
threshold=hl_peak_threshold)
|
|
|
|
# ugly patch for hough_line_peaks shortcomings
|
|
# in rare cases len(accums) != len(angles) or len(dists
|
|
if len(accums) != len(angles):
|
|
angles = accums
|
|
dists = accums
|
|
|
|
# (II) check if lines intersect close to the center and group them accordingly
|
|
interval = [lbl_ind_x.shape[1] * 1 / 8., lbl_ind_x.shape[1] * 7 / 8.]
|
|
X_dist = pdist(np.stack([angles, dists], axis=1), lambda l1, l2: do_intersect_in_interval(l1, l2, interval))
|
|
labels = compute_group_labels_from_dists(X_dist).astype(int)
|
|
if verbose:
|
|
print('detected groups: {} | num lines: {}.'.format(len(np.unique(labels)), num_lines))
|
|
# collect lines in dataframe
|
|
line_hypos = pd.DataFrame({'accum': accums, 'angle': angles, 'dist': dists, 'label': labels})
|
|
line_hypos_agg = line_hypos.groupby('label').mean()
|
|
# add group diff column
|
|
diffs = line_hypos_agg.dist.sort_values().diff()
|
|
# compute interline median
|
|
dist_interline_median = diffs.median()
|
|
|
|
# (III) check if remaining groups are very close
|
|
X_dist = pdist(np.stack([line_hypos_agg.angle.values, line_hypos_agg.dist.values], axis=1),
|
|
lambda l1, l2: nearby_and_near_parallel_2(l1, l2, dist_interline_median, interval))
|
|
updated_labels = compute_group_labels_from_dists(X_dist).astype(int)
|
|
# update dataframe and grouping
|
|
line_hypos.label.replace(to_replace=line_hypos_agg.index.values, value=updated_labels, inplace=True)
|
|
|
|
# (IV) re-group with updated labels and get line meta for later usage
|
|
line_hypos_agg = line_hypos.groupby('label').mean()
|
|
# group lines and remember index (needs to be here)
|
|
group2line = line_hypos_agg.index.values
|
|
# add column group dist diff
|
|
diffs = line_hypos_agg.dist.sort_values().diff()
|
|
diffs.name = 'group_diff' # set name before join
|
|
line_hypos = line_hypos.join(diffs, on='label')
|
|
# add column group angle diff
|
|
angle_diff = line_hypos_agg.sort_values('dist').angle.apply(np.rad2deg).diff()
|
|
angle_diff.name = 'group_angle_diff' # set name before join
|
|
line_hypos = line_hypos.join(angle_diff, on='label')
|
|
# compute interline median
|
|
dist_interline_median = diffs.median()
|
|
if verbose:
|
|
print('Update: detected groups: {} | num lines: {}.'.format(len(line_hypos_agg), num_lines))
|
|
|
|
# (V) label line detection segments according to line hypos
|
|
# compute probabilistic hough transform for lines
|
|
# line_segs = probabilistic_hough_line(skeleton, threshold=6, line_length=15,
|
|
# line_gap=6, theta=basic_theta)
|
|
line_length = 8 # v007: 8 v047: 5 # line detector dependent (VIP)
|
|
if len_max < line_length:
|
|
line_length = len_max
|
|
line_segs = probabilistic_hough_line(skeleton, threshold=6, line_length=line_length,
|
|
line_gap=6, theta=theta_range)
|
|
if len(line_segs) > 0:
|
|
# assign line segments to nearest line
|
|
ls_labels = assign_line_segments_to_lines(line_segs, line_hypos)
|
|
|
|
# associate segments with lines
|
|
segm_labels, im_marker = associate_segments_with_lines(lbl_ind_x, line_segs, ls_labels, group2line)
|
|
else:
|
|
segm_labels, ls_labels = lbl_ind_x, None # set segm_labels so that line_frag gets a shape
|
|
|
|
return line_hypos, line_segs, segm_labels, ls_labels, dist_interline_median, group2line, h, theta, d, skeleton
|
|
|
|
|
|
|