Arquivos
cuneiform-sign-detection-code/lib/utils/transform_utils.py
T
2020-11-19 12:18:53 +01:00

341 linhas
12 KiB
Python

import numbers
import numpy as np
from PIL import Image
from PIL import ImageOps
import random
from random import randint
import torch.functional as F
# from skimage.transform import warp, AffineTransform
from bbox_utils import intersection_over_union
# own stuff
def convert2binaryPIL(lbl_ind):
# convert to PIL binary '1' without dither
lbl_im = Image.fromarray(np.uint8(lbl_ind))
fn = (lambda x: 255 if x > 0 else 0)
lbl_im = lbl_im.convert('L').point(fn, mode='1')
return lbl_im
def pad2square(bb, context_pad_ratio=0, context_pad=0, take_long_side=True):
# -- extract square patches using ground truth bounding boxes
# assert (context_pad >= 0 and context_pad_ratio == 0) or (context_pad_ratio >= 0 and context_pad == 0)
width = bb[2] - bb[0]
height = bb[3] - bb[1]
diff = width - height
width_is_smaller = 0 > diff
height_is_smaller = 0 < diff
if take_long_side:
# take long side
if context_pad == 0:
if width_is_smaller:
context_pad = np.round(context_pad_ratio * height)
else:
context_pad = np.round(context_pad_ratio * width)
bb[0] = bb[0] - context_pad - (width_is_smaller * np.ceil(0.5 * (height - width)))
bb[2] = bb[2] + context_pad + (width_is_smaller * np.floor(0.5 * (height - width)))
bb[1] = bb[1] - context_pad - (height_is_smaller * np.ceil(0.5 * (width - height)))
bb[3] = bb[3] + context_pad + (height_is_smaller * np.floor(0.5 * (width - height)))
else:
# take small side
if context_pad == 0:
if width_is_smaller:
context_pad = np.round(context_pad_ratio * width)
else:
context_pad = np.round(context_pad_ratio * height)
bb[0] = bb[0] - context_pad - (height_is_smaller * np.ceil(0.5 * (height - width)))
bb[2] = bb[2] + context_pad + (height_is_smaller * np.floor(0.5 * (height - width)))
bb[1] = bb[1] - context_pad - (width_is_smaller * np.ceil(0.5 * (width - height)))
bb[3] = bb[3] + context_pad + (width_is_smaller * np.floor(0.5 * (width - height)))
return bb
# BBOX sampling / cropping functions
def crop_image(im, bb, context_pad=0, pad_to_square=False, mean_values=[0, 0, 0]):
"""
Crop a window from the image for detection. Include surrounding context
according to the `context_pad` configuration. Creates square crop which
respects the aspect ratio.
window: bounding box coordinates as xmin, ymin, xmax, ymax.
"""
# copy list and use as ndarray
bb = np.array(bb, dtype=int) # list(bb)
imw, imh = im.shape[:2]
# pad to square while preserving aspect ratio
if pad_to_square:
bb = pad2square(bb, context_pad=context_pad)
# -- check whether bbox inside image
# pad: [x_min, y_min, x_max, y_max]
pad = [0, 0, 0, 0]
if (bb[0] < 0):
pad[0] = abs(bb[0])
bb[0] = 0
if (bb[1] < 0):
pad[1] = abs(bb[1])
bb[1] = 0
if (bb[2] > imh):
pad[2] = bb[2] - imh
bb[2] = imh
if (bb[3] > imw):
pad[3] = bb[3] - imw
bb[3] = imw
# -- apply zero padding if necessary
im = im[bb[1]:bb[3], bb[0]:bb[2], :]
channel_mean = np.reshape(mean_values, (1, 1, 3)).astype(np.uint8)
if pad[0]>0:
pad_left = np.multiply(np.ones(shape=(imw, pad[0], 3), dtype=np.uint8),
np.tile(channel_mean,(imw, pad[0],1)))
im = np.concatenate((pad_left, im), axis=1)
if pad[1]>0:
pad_up = np.multiply(np.ones(shape=(pad[1], imh, 3), dtype=np.uint8),
np.tile(channel_mean, (pad[1], imh, 1)))
im = np.concatenate((pad_up, im), axis=0)
if pad[2]>0:
pad_right = np.multiply(np.ones(shape=(imw, pad[2], 3), dtype=np.uint8),
np.tile(channel_mean, (imw, pad[2], 1)))
im = np.concatenate((im, pad_right), axis=1)
if pad[3]>0:
pad_down = np.multiply(np.ones(shape=(pad[3], imh, 3), dtype=np.uint8),
np.tile(channel_mean, (pad[3], imh, 1)))
im = np.concatenate((im, pad_down), axis=0)
return im, bb.tolist()
else:
if context_pad > 0:
# better use crop_pil_image
return NotImplemented
# return simple crop
return im[bb[1]:bb[3], bb[0]:bb[2], :]
def crop_pil_image(im, bb, context_pad=0, pad_to_square=False, fill_values=None):
"""
Crop a window from the image for detection. Include surrounding context
according to the `context_pad` configuration. Creates square crop which
respects the aspect ratio.
window: bounding box coordinates as xmin, ymin, xmax, ymax.
"""
# copy list and use as ndarray
bb = np.array(bb, dtype=int) # list(bb)
imw, imh = im.size
# pad to square while preserving aspect ratio
if pad_to_square:
bb = pad2square(bb, context_pad=context_pad)
if fill_values is None:
# if cropped out of image range, pillow pads with zeros automatically
im = im.crop((bb[0], bb[1], bb[2], bb[3]))
else:
# check whether bbox inside image
# pad: [x_min, y_min, x_max, y_max]
pad = [0, 0, 0, 0]
if bb[0] < 0:
pad[0] = abs(bb[0])
bb[0] = 0
if bb[1] < 0:
pad[1] = abs(bb[1])
bb[1] = 0
if bb[2] > imh:
pad[2] = bb[2] - imh
bb[2] = imh
if bb[3] > imw:
pad[3] = bb[3] - imw
bb[3] = imw
# crop box
im = im.crop((bb[0], bb[1], bb[2], bb[3]))
# apply zero padding if necessary
im = ImageOps.expand(im, border=(pad[0], pad[1], pad[2], pad[3]), fill=tuple(fill_values))
return im, bb.tolist()
else:
if context_pad > 0:
bb[0] = max(bb[0] - context_pad, 0)
bb[2] = min(bb[2] + context_pad, imw)
bb[1] = max(bb[1] - context_pad, 0)
bb[3] = min(bb[3] + context_pad, imh)
# return simple crop
return im.crop((bb[0], bb[1], bb[2], bb[3])), bb.tolist()
def spatial_sample(im_pad, bb, spatial_sample_rng, rnd_scale_ratio=0.05):
im = im_pad
imh, imw = im.shape[:2]
im_bb = [0, 0, imw, imh]
# make ground truth box square, and use its dimensions
bb_gt = list(bb)
w = bb[2] - bb[0]
h = bb[3] - bb[1]
if w > h:
bb_gt[1] = int(bb_gt[1] - np.ceil(0.5 * (w - h)))
bb_gt[3] = int(bb_gt[3] + np.floor(0.5 * (w - h)))
h = w
else:
bb_gt[0] = int(bb_gt[0] - np.ceil(0.5 * (h - w)))
bb_gt[2] = int(bb_gt[2] + np.floor(0.5 * (h - w)))
w = h
# add random scaling to test bbox
# by treating dimension differently the aspect ratio will fluctuate a little (due to resizing afterwards!)
wrange = round(rnd_scale_ratio * w)
hrange = round(rnd_scale_ratio * h)
w = min(w + random.randint(-wrange, 2*wrange), imw - 1) # ensure size is in im_pad
h = w # min(h + random.randint(hrange, 2*hrange), imh - 1) # ensure size is in im_pad
# set ranges according to provided label
min_IoU = spatial_sample_rng[0]
max_IoU = spatial_sample_rng[1]
max_iter = 500
curr_iter = 0
ratio = 0.0
while curr_iter < max_iter and (ratio >= max_IoU or ratio <= min_IoU):
curr_iter += 1
# bbox sampling
jxy = [randint(0, im_bb[2] - w), randint(0, im_bb[3] - h)]
bb_test = list([jxy[0], jxy[1], w + jxy[0], h + jxy[1]])
# check if new box fits criteria
if min(bb_test) >= 0 and bb_test[2] <= im.shape[1] and bb_test[3] <= im.shape[0]:
ratio = intersection_over_union(bb_test, bb_gt)
if max_IoU >= ratio >= min_IoU:
im = im[bb_test[1]:bb_test[3], bb_test[0]:bb_test[2], :]
# new_bb_gt = [bb_gt[0] - bb_test[0], bb_gt[1] - bb_test[1], bb_gt[2] - bb_test[0], bb_gt[3] - bb_test[1]]
new_bb_gt = bb_gt
else:
im = im
new_bb_gt = bb_gt
# DEBUG_MODE = False
# if DEBUG_MODE:
# print "tricky box", w, h, imw, imh
return im, new_bb_gt, bb_test
# TRANSFORMS
class MyRandomZoom(object):
def __init__(self, scale_range, interpolation=Image.BILINEAR):
self.scale_range = scale_range
self.interpolation = interpolation
def __call__(self, img):
scale = np.random.uniform(*self.scale_range)
new_size = (int(img.height * scale), int(img.width * scale))
return F.resize(img, new_size, self.interpolation)
class MyFuzzyZoom(object):
"""
:param target_size: (2-tuple) height, width
:param scale_range: (2-tuple) range from which target_size may deviate
:param interpolation: ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional)
"""
def __init__(self, target_size, scale_range, interpolation=Image.BILINEAR):
self.target_size = target_size
self.scale_range = scale_range
self.interpolation = interpolation
@staticmethod
def get_params(scale_range):
return np.random.uniform(*scale_range)
def __call__(self, img):
scale = self.get_params(self.scale_range)
new_size = (int(self.target_size[0] * scale), int(self.target_size[1] * scale))
return F.resize(img, new_size, self.interpolation)
class MyRandomChoiceZoom(object):
def __init__(self, scales, p=None, interpolation=Image.BILINEAR):
self.scales = scales
self.interpolation = interpolation
self.p = p
def __call__(self, img):
scale = np.random.choice(self.scales, replace=True, p=self.p)
new_size = (int(img.height * scale), int(img.width * scale))
return F.resize(img, new_size, self.interpolation)
class MyRandomCenteredRotation(object):
"""
Args:
degrees (sequence or float or int): Range of degrees to select from.
If degrees is a number instead of sequence like (min, max), the range of degrees
will be (-degrees, +degrees).
translation_range (2-tuple): Range of pixels to select from.
The center of rotation is shifted according to a number sampled from this range.
resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
An optional resampling filter.
See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
"""
def __init__(self, degrees, translation_range=(-3, 3), resample=Image.BILINEAR):
if isinstance(degrees, numbers.Number):
if degrees < 0:
raise ValueError("If degrees is a single number, it must be positive.")
self.degrees = (-degrees, degrees)
else:
if len(degrees) != 2:
raise ValueError("If degrees is a sequence, it must be of len 2.")
self.degrees = degrees
self.translation_range = translation_range
self.resample = resample
def __call__(self, img):
angle = np.random.uniform(*self.degrees)
translated_center = None
if self.translation_range:
translated_center = (
np.random.uniform(*self.translation_range) + int(img.height/2),
np.random.uniform(*self.translation_range) + int(img.width/2)
)
return F.rotate(img, angle, resample=self.resample, expand=False, center=translated_center)
class UnNormalize(object):
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, tensor):
"""
Args:
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
Returns:
Tensor: Normalized image.
"""
for t, m, s in zip(tensor, self.mean, self.std):
t.mul_(s).add_(m)
# The normalize code -> t.sub_(m).div_(s)
return tensor