Source code for cosense3d.dataset.pipeline.transform

import numpy as np
from PIL import Image
import torch


[docs]class ResizeCropFlipRotImage: """ Augment images with random resize, crop, flip and rotation. Modified from StreamPETR. """ def __init__(self, data_aug_conf=None, with_2d=True, filter_invisible=True, training=True): self.data_aug_conf = data_aug_conf self.training = training self.min_size = 2.0 self.with_2d = with_2d self.filter_invisible = filter_invisible def __call__(self, data_dict): imgs = data_dict['img'] N = len(imgs) new_imgs = [] new_gt_bboxes = [] new_centers2d = [] new_gt_labels = [] new_depths = [] assert self.data_aug_conf['rot_lim'] == [0.0, 0.0], "Rotation is not currently supported" resize, resize_dims, crop, flip, rotate = self._sample_augmentation() for i in range(N): img = Image.fromarray(np.uint8(imgs[i])) img, ida_mat = self._img_transform( img, resize=resize, resize_dims=resize_dims, crop=crop, flip=flip, rotate=rotate, ) if self.with_2d: # sync_2d bbox labels gt_bboxes = data_dict['bboxes2d'][i] centers2d = data_dict['centers2d'][i] gt_labels = data_dict['labels2d'][i] depths = data_dict['depths2d'][i] if len(gt_bboxes) != 0: gt_bboxes, centers2d, gt_labels, depths = self._bboxes_transform( gt_bboxes, centers2d, gt_labels, depths, resize=resize, crop=crop, flip=flip, ) if len(gt_bboxes) != 0 and self.filter_invisible: gt_bboxes, centers2d, gt_labels, depths = self._filter_invisible(gt_bboxes, centers2d, gt_labels, depths) new_gt_bboxes.append(gt_bboxes) new_centers2d.append(centers2d) new_gt_labels.append(gt_labels) new_depths.append(depths) new_imgs.append(np.array(img).astype(np.float32)) data_dict['intrinsics'][i][:3, :3] = ida_mat @ data_dict['intrinsics'][i][:3, :3] data_dict['bboxes2d'] = new_gt_bboxes data_dict['centers2d'] = new_centers2d data_dict['labels2d'] = new_gt_labels data_dict['depths2d'] = new_depths data_dict['img'] = new_imgs data_dict['lidar2img'] = [data_dict['intrinsics'][i] @ data_dict['extrinsics'][i] for i in range(len(data_dict['extrinsics']))] return data_dict def _bboxes_transform(self, bboxes, centers2d, gt_labels, depths,resize, crop, flip): assert len(bboxes) == len(centers2d) == len(gt_labels) == len(depths) fH, fW = self.data_aug_conf["final_dim"] bboxes = bboxes * resize bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop[0] bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop[1] bboxes[:, [0, 2]] = np.clip(bboxes[:, [0, 2]], 0, fW) bboxes[:, [1, 3]] = np.clip(bboxes[:, [1, 3]], 0, fH) keep = ((bboxes[:, 2] - bboxes[:, 0]) >= self.min_size) & ((bboxes[:, 3] - bboxes[:, 1]) >= self.min_size) if flip: x0 = bboxes[:, 0].copy() x1 = bboxes[:, 2].copy() bboxes[:, 2] = fW - x0 bboxes[:, 0] = fW - x1 bboxes = bboxes[keep] centers2d = centers2d * resize centers2d[:, 0] = centers2d[:, 0] - crop[0] centers2d[:, 1] = centers2d[:, 1] - crop[1] centers2d[:, 0] = np.clip(centers2d[:, 0], 0, fW) centers2d[:, 1] = np.clip(centers2d[:, 1], 0, fH) if flip: centers2d[:, 0] = fW - centers2d[:, 0] centers2d = centers2d[keep] gt_labels = gt_labels[keep] depths = depths[keep] return bboxes, centers2d, gt_labels, depths def _filter_invisible(self, bboxes, centers2d, gt_labels, depths): # filter invisible 2d bboxes assert len(bboxes) == len(centers2d) == len(gt_labels) == len(depths) fH, fW = self.data_aug_conf["final_dim"] indices_maps = np.zeros((fH,fW)) tmp_bboxes = np.zeros_like(bboxes) tmp_bboxes[:, :2] = np.ceil(bboxes[:, :2]) tmp_bboxes[:, 2:] = np.floor(bboxes[:, 2:]) tmp_bboxes = tmp_bboxes.astype(np.int64) sort_idx = np.argsort(-depths, axis=0, kind='stable') tmp_bboxes = tmp_bboxes[sort_idx] bboxes = bboxes[sort_idx] depths = depths[sort_idx] centers2d = centers2d[sort_idx] gt_labels = gt_labels[sort_idx] for i in range(bboxes.shape[0]): u1, v1, u2, v2 = tmp_bboxes[i] indices_maps[v1:v2, u1:u2] = i indices_res = np.unique(indices_maps).astype(np.int64) bboxes = bboxes[indices_res] depths = depths[indices_res] centers2d = centers2d[indices_res] gt_labels = gt_labels[indices_res] return bboxes, centers2d, gt_labels, depths def _get_rot(self, h): return torch.Tensor( [ [np.cos(h), np.sin(h)], [-np.sin(h), np.cos(h)], ] ) def _img_transform(self, img, resize, resize_dims, crop, flip, rotate): ida_rot = torch.eye(2) ida_tran = torch.zeros(2) # adjust image img = img.resize(resize_dims) img = img.crop(crop) if flip: img = img.transpose(method=Image.FLIP_LEFT_RIGHT) img = img.rotate(rotate) # post-homography transformation ida_rot *= resize ida_tran -= torch.Tensor(crop[:2]) if flip: A = torch.Tensor([[-1, 0], [0, 1]]) b = torch.Tensor([crop[2] - crop[0], 0]) ida_rot = A.matmul(ida_rot) ida_tran = A.matmul(ida_tran) + b A = self._get_rot(rotate / 180 * np.pi) b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2 b = A.matmul(-b) + b ida_rot = A.matmul(ida_rot) ida_tran = A.matmul(ida_tran) + b ida_mat = torch.eye(3) ida_mat[:2, :2] = ida_rot ida_mat[:2, 2] = ida_tran return img, ida_mat def _sample_augmentation(self): H, W = self.data_aug_conf["H"], self.data_aug_conf["W"] fH, fW = self.data_aug_conf["final_dim"] if self.training: resize = np.random.uniform(*self.data_aug_conf["resize_lim"]) resize_dims = (int(W * resize), int(H * resize)) newW, newH = resize_dims crop_h = int((1 - np.random.uniform(*self.data_aug_conf["bot_pct_lim"])) * newH) - fH crop_w = int(np.random.uniform(0, max(0, newW - fW))) crop = (crop_w, crop_h, crop_w + fW, crop_h + fH) flip = False if self.data_aug_conf["rand_flip"] and np.random.choice([0, 1]): flip = True rotate = np.random.uniform(*self.data_aug_conf["rot_lim"]) else: resize = max(fH / H, fW / W) resize_dims = (int(W * resize), int(H * resize)) newW, newH = resize_dims crop_h = int((1 - np.mean(self.data_aug_conf["bot_pct_lim"])) * newH) - fH crop_w = int(max(0, newW - fW) / 2) crop = (crop_w, crop_h, crop_w + fW, crop_h + fH) flip = False rotate = 0 return resize, resize_dims, crop, flip, rotate
[docs]class ResizeImage: """ Resize images. """ def __init__(self, img_size): self.img_size = img_size def __call__(self, data_dict): imgs = data_dict['img'] imgs_out = [] for i, img in enumerate(imgs): img = Image.fromarray(np.uint8(img)) W, H = img.size img = img.resize(self.img_size) imgs_out.append(np.array(img).astype(np.float32)) data_dict['intrinsics'][i][0, 0] = self.img_size[0] / W * data_dict['intrinsics'][i][0, 0] data_dict['intrinsics'][i][1, 1] = self.img_size[1] / H * data_dict['intrinsics'][i][1, 1] # todo convert 2d annotations data_dict['img'] = imgs_out return data_dict