import numpy as np
from PIL import Image
import torch
[docs]class ResizeCropFlipRotImage:
"""
Augment images with random resize, crop, flip and rotation. Modified from StreamPETR.
"""
def __init__(self, data_aug_conf=None, with_2d=True, filter_invisible=True, training=True):
self.data_aug_conf = data_aug_conf
self.training = training
self.min_size = 2.0
self.with_2d = with_2d
self.filter_invisible = filter_invisible
def __call__(self, data_dict):
imgs = data_dict['img']
N = len(imgs)
new_imgs = []
new_gt_bboxes = []
new_centers2d = []
new_gt_labels = []
new_depths = []
assert self.data_aug_conf['rot_lim'] == [0.0, 0.0], "Rotation is not currently supported"
resize, resize_dims, crop, flip, rotate = self._sample_augmentation()
for i in range(N):
img = Image.fromarray(np.uint8(imgs[i]))
img, ida_mat = self._img_transform(
img,
resize=resize,
resize_dims=resize_dims,
crop=crop,
flip=flip,
rotate=rotate,
)
if self.with_2d: # sync_2d bbox labels
gt_bboxes = data_dict['bboxes2d'][i]
centers2d = data_dict['centers2d'][i]
gt_labels = data_dict['labels2d'][i]
depths = data_dict['depths2d'][i]
if len(gt_bboxes) != 0:
gt_bboxes, centers2d, gt_labels, depths = self._bboxes_transform(
gt_bboxes,
centers2d,
gt_labels,
depths,
resize=resize,
crop=crop,
flip=flip,
)
if len(gt_bboxes) != 0 and self.filter_invisible:
gt_bboxes, centers2d, gt_labels, depths = self._filter_invisible(gt_bboxes, centers2d, gt_labels, depths)
new_gt_bboxes.append(gt_bboxes)
new_centers2d.append(centers2d)
new_gt_labels.append(gt_labels)
new_depths.append(depths)
new_imgs.append(np.array(img).astype(np.float32))
data_dict['intrinsics'][i][:3, :3] = ida_mat @ data_dict['intrinsics'][i][:3, :3]
data_dict['bboxes2d'] = new_gt_bboxes
data_dict['centers2d'] = new_centers2d
data_dict['labels2d'] = new_gt_labels
data_dict['depths2d'] = new_depths
data_dict['img'] = new_imgs
data_dict['lidar2img'] = [data_dict['intrinsics'][i] @ data_dict['extrinsics'][i]
for i in range(len(data_dict['extrinsics']))]
return data_dict
def _bboxes_transform(self, bboxes, centers2d, gt_labels, depths,resize, crop, flip):
assert len(bboxes) == len(centers2d) == len(gt_labels) == len(depths)
fH, fW = self.data_aug_conf["final_dim"]
bboxes = bboxes * resize
bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop[0]
bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop[1]
bboxes[:, [0, 2]] = np.clip(bboxes[:, [0, 2]], 0, fW)
bboxes[:, [1, 3]] = np.clip(bboxes[:, [1, 3]], 0, fH)
keep = ((bboxes[:, 2] - bboxes[:, 0]) >= self.min_size) & ((bboxes[:, 3] - bboxes[:, 1]) >= self.min_size)
if flip:
x0 = bboxes[:, 0].copy()
x1 = bboxes[:, 2].copy()
bboxes[:, 2] = fW - x0
bboxes[:, 0] = fW - x1
bboxes = bboxes[keep]
centers2d = centers2d * resize
centers2d[:, 0] = centers2d[:, 0] - crop[0]
centers2d[:, 1] = centers2d[:, 1] - crop[1]
centers2d[:, 0] = np.clip(centers2d[:, 0], 0, fW)
centers2d[:, 1] = np.clip(centers2d[:, 1], 0, fH)
if flip:
centers2d[:, 0] = fW - centers2d[:, 0]
centers2d = centers2d[keep]
gt_labels = gt_labels[keep]
depths = depths[keep]
return bboxes, centers2d, gt_labels, depths
def _filter_invisible(self, bboxes, centers2d, gt_labels, depths):
# filter invisible 2d bboxes
assert len(bboxes) == len(centers2d) == len(gt_labels) == len(depths)
fH, fW = self.data_aug_conf["final_dim"]
indices_maps = np.zeros((fH,fW))
tmp_bboxes = np.zeros_like(bboxes)
tmp_bboxes[:, :2] = np.ceil(bboxes[:, :2])
tmp_bboxes[:, 2:] = np.floor(bboxes[:, 2:])
tmp_bboxes = tmp_bboxes.astype(np.int64)
sort_idx = np.argsort(-depths, axis=0, kind='stable')
tmp_bboxes = tmp_bboxes[sort_idx]
bboxes = bboxes[sort_idx]
depths = depths[sort_idx]
centers2d = centers2d[sort_idx]
gt_labels = gt_labels[sort_idx]
for i in range(bboxes.shape[0]):
u1, v1, u2, v2 = tmp_bboxes[i]
indices_maps[v1:v2, u1:u2] = i
indices_res = np.unique(indices_maps).astype(np.int64)
bboxes = bboxes[indices_res]
depths = depths[indices_res]
centers2d = centers2d[indices_res]
gt_labels = gt_labels[indices_res]
return bboxes, centers2d, gt_labels, depths
def _get_rot(self, h):
return torch.Tensor(
[
[np.cos(h), np.sin(h)],
[-np.sin(h), np.cos(h)],
]
)
def _img_transform(self, img, resize, resize_dims, crop, flip, rotate):
ida_rot = torch.eye(2)
ida_tran = torch.zeros(2)
# adjust image
img = img.resize(resize_dims)
img = img.crop(crop)
if flip:
img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
img = img.rotate(rotate)
# post-homography transformation
ida_rot *= resize
ida_tran -= torch.Tensor(crop[:2])
if flip:
A = torch.Tensor([[-1, 0], [0, 1]])
b = torch.Tensor([crop[2] - crop[0], 0])
ida_rot = A.matmul(ida_rot)
ida_tran = A.matmul(ida_tran) + b
A = self._get_rot(rotate / 180 * np.pi)
b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2
b = A.matmul(-b) + b
ida_rot = A.matmul(ida_rot)
ida_tran = A.matmul(ida_tran) + b
ida_mat = torch.eye(3)
ida_mat[:2, :2] = ida_rot
ida_mat[:2, 2] = ida_tran
return img, ida_mat
def _sample_augmentation(self):
H, W = self.data_aug_conf["H"], self.data_aug_conf["W"]
fH, fW = self.data_aug_conf["final_dim"]
if self.training:
resize = np.random.uniform(*self.data_aug_conf["resize_lim"])
resize_dims = (int(W * resize), int(H * resize))
newW, newH = resize_dims
crop_h = int((1 - np.random.uniform(*self.data_aug_conf["bot_pct_lim"])) * newH) - fH
crop_w = int(np.random.uniform(0, max(0, newW - fW)))
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
flip = False
if self.data_aug_conf["rand_flip"] and np.random.choice([0, 1]):
flip = True
rotate = np.random.uniform(*self.data_aug_conf["rot_lim"])
else:
resize = max(fH / H, fW / W)
resize_dims = (int(W * resize), int(H * resize))
newW, newH = resize_dims
crop_h = int((1 - np.mean(self.data_aug_conf["bot_pct_lim"])) * newH) - fH
crop_w = int(max(0, newW - fW) / 2)
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
flip = False
rotate = 0
return resize, resize_dims, crop, flip, rotate
[docs]class ResizeImage:
"""
Resize images.
"""
def __init__(self, img_size):
self.img_size = img_size
def __call__(self, data_dict):
imgs = data_dict['img']
imgs_out = []
for i, img in enumerate(imgs):
img = Image.fromarray(np.uint8(img))
W, H = img.size
img = img.resize(self.img_size)
imgs_out.append(np.array(img).astype(np.float32))
data_dict['intrinsics'][i][0, 0] = self.img_size[0] / W * data_dict['intrinsics'][i][0, 0]
data_dict['intrinsics'][i][1, 1] = self.img_size[1] / H * data_dict['intrinsics'][i][1, 1]
# todo convert 2d annotations
data_dict['img'] = imgs_out
return data_dict