[docs]classVisionTransform(Transform):r"""Base class of all transforms used in computer vision. Calling logic: apply_batch() -> apply() -> _apply_image() and other _apply_*() method. If you want to implement a self-defined transform method for image, rewrite _apply_image method in subclass. Args: order: input type order. Input is a tuple containing different structures, order is used to specify the order of structures. For example, if your input is (image, boxes) type, then the ``order`` should be ("image", "boxes"). Current available strings and data type are describe below: * "image": input image, with shape of `(H, W, C)`. * "coords": coordinates, with shape of `(N, 2)`. * "boxes": bounding boxes, with shape of `(N, 4)`, "xyxy" format, the 1st "xy" represents top left point of a box, the 2nd "xy" represents right bottom point. * "mask": map used for segmentation, with shape of `(H, W, 1)`. * "keypoints": keypoints with shape of `(N, K, 3)`, N for number of instances, and K for number of keypoints in one instance. The first two dimensions of last axis is coordinate of keypoints and the the 3rd dimension is the label of keypoints. * "polygons": a sequence containing numpy arrays, its length is the number of instances. Each numpy array represents polygon coordinate of one instance. * "category": categories for some data type. For example, "image_category" means category of the input image and "boxes_category" means categories of bounding boxes. * "info": information for images such as image shapes and image path. You can also customize your data types only if you implement the corresponding _apply_*() methods, otherwise ``NotImplementedError`` will be raised. """def__init__(self,order=None):super().__init__()iforderisNone:order=("image",)elifnotisinstance(order,collections.abc.Sequence):raiseValueError("order should be a sequence, but got order={}".format(order))forkinorder:ifkin("batch",):raiseValueError("{} is invalid data type".format(k))elifk.endswith("category")ork.endswith("info"):# when the key is *category or info, we should do nothing# if the corresponding apply methods are not implemented.continueelifself._get_apply(k)isNone:raiseNotImplementedError("{} is unsupported data type".format(k))self.order=order
[docs]defapply_batch(self,inputs:Sequence[Tuple]):r"""Apply transform on batch input data."""returntuple(self.apply(input)forinputininputs)
[docs]defapply(self,input:Tuple):r"""Apply transform on single input data."""ifnotisinstance(input,tuple):input=(input,)output=[]foriinrange(min(len(input),len(self.order))):apply_func=self._get_apply(self.order[i])ifapply_funcisNone:output.append(input[i])else:output.append(apply_func(input[i]))iflen(input)>len(self.order):output.extend(input[len(self.order):])iflen(output)==1:output=output[0]else:output=tuple(output)returnoutput
[docs]classToMode(VisionTransform):r"""Change input data to a target mode. For example, most transforms use HWC mode image, while the neural network might use CHW mode input tensor. Args: mode: output mode of input. Default: "CHW" order: the same with :class:`VisionTransform` """def__init__(self,mode="CHW",*,order=None):super().__init__(order)assertmodein["CHW"],"unsupported mode: {}".format(mode)self.mode=modedef_apply_image(self,image):ifself.mode=="CHW":returnnp.ascontiguousarray(np.rollaxis(image,2))returnimagedef_apply_coords(self,coords):returncoordsdef_apply_mask(self,mask):ifself.mode=="CHW":returnnp.ascontiguousarray(np.rollaxis(mask,2))returnmask
[docs]classCompose(VisionTransform):r"""Composes several transfomations together. Args: transforms: list of :class:`VisionTransform` to compose. batch_compose: whether keep the same transform order in batch data when shuffle. shuffle_indices: indices used for random shuffle, start at 1. order: the same with :class:`VisionTransform` .. seealso:: Refer to :mod:`~.data.transform` module for vision transform APIs. Examples: >>> import megengine.data.transform as T >>> T.Compose([ # doctest: +SKIP ... T.RandomHorizontalFlip(), # 1st ... T.RandomVerticalFlip(), # 2nd ... T.CenterCrop(100), # 3rd ... T.ToMode("CHW"), # 4th ... ], ... shuffle_indices=[(1, 2, 3)] ... ) In this case, ``shuffle_indices`` is given so each input data will be transformed out of order: .. math:: \begin{array}{cc} [{\color{red}1 \quad 2 \quad 3} \quad 4] & [{\color{red}1 \quad 3 \quad 2} \quad 4] \\ [{\color{red}2 \quad 1 \quad 3} \quad 4] & [{\color{red}2 \quad 3 \quad 1} \quad 4] \\ [{\color{red}3 \quad 1 \quad 2} \quad 4] & [{\color{red}3 \quad 2 \quad 1} \quad 4] \end{array} In another case, if ``[(1, 3), (2, 4)]`` is given, then the 1st and 3rd transfomation will be random shuffled, the 2nd and 4th transfomation will also be shuffled: .. math:: \begin{array}{cc} [{\color{red}1} \quad {\color{blue}2} \quad {\color{red}3} \quad {\color{blue}4}] & [{\color{red}1} \quad {\color{blue}4} \quad {\color{red}3} \quad {\color{blue}2}] \\ [{\color{red}3} \quad {\color{blue}2} \quad {\color{red}1} \quad {\color{blue}4}] & [{\color{red}3} \quad {\color{blue}4} \quad {\color{red}1} \quad {\color{blue}2}] \end{array} Different colors represent different groups that need to be internally shuffled. .. warning:: Different samples within each batch will also use random transfomation orders, unless ``batch_compose`` is set to ``True``. """def__init__(self,transforms:List[VisionTransform]=[],batch_compose:bool=False,shuffle_indices:List[Tuple]=None,*,order=None):super().__init__(order)self.transforms=transformsself._set_order()ifbatch_composeandshuffle_indicesisnotNone:raiseValueError("Do not support shuffle when apply transforms along the whole batch")self.batch_compose=batch_composeifshuffle_indicesisnotNone:shuffle_indices=[tuple(x-1forxinidx)foridxinshuffle_indices]self.shuffle_indices=shuffle_indicesdef_set_order(self):fortinself.transforms:t.order=self.orderifisinstance(t,Compose):t._set_order()
[docs]classTorchTransformCompose(VisionTransform):r"""Compose class used for transforms in torchvision, only support PIL image, some transforms with tensor in torchvision are not supported, such as Normalize and ToTensor in torchvision. Args: transforms: the same with ``Compose``. order: the same with :class:`VisionTransform`. """def__init__(self,transforms,*,order=None):super().__init__(order)self.transforms=transformsdef_apply_image(self,image):fromPILimportImagetry:importaccimageexceptImportError:accimage=Noneifimage.shape[0]==3:# CHWimage=np.ascontiguousarray(image[[2,1,0]])elifimage.shape[2]==3:# HWCimage=cv2.cvtColor(image,cv2.COLOR_BGR2RGB)image=Image.fromarray(image.astype(np.uint8))fortinself.transforms:image=t(image)ifisinstance(image,Image.Image)or(accimageisnotNoneandisinstance(image,accimage.Image)):image=np.array(image,dtype=np.uint8)ifimage.shape[0]==3:# CHWimage=np.ascontiguousarray(image[[2,1,0]])elifimage.shape[2]==3:# HWCimage=cv2.cvtColor(image,cv2.COLOR_RGB2BGR)returnimage
[docs]classPad(VisionTransform):r"""Pad the input data. Args: size: padding size of input image, it could be integer or sequence. If it is an integer, the input image will be padded in four directions. If it is a sequence containing two integers, the bottom and right side of image will be padded. If it is a sequence containing four integers, the top, bottom, left, right side of image will be padded with given size. value: padding value of image, could be a sequence of int or float. if it is float value, the dtype of image will be casted to float32 also. mask_value: padding value of segmentation map. order: the same with :class:`VisionTransform`. """def__init__(self,size=0,value=0,mask_value=0,*,order=None):super().__init__(order)ifisinstance(size,int):size=(size,size,size,size)elifisinstance(size,collections.abc.Sequence)andlen(size)==2:size=(0,size[0],0,size[1])elifnot(isinstance(size,collections.abc.Sequence)andlen(size)==4):raiseValueError("size should be a list/tuple which contains ""(top, down, left, right) four pad sizes.")self.size=sizeself.value=valueifnotisinstance(mask_value,int):raiseValueError("mask_value should be a positive integer, ""but got mask_value={}".format(mask_value))self.mask_value=mask_valuedef_apply_image(self,image):returnF.pad(image,self.size,self.value)def_apply_coords(self,coords):coords[:,0]+=self.size[2]coords[:,1]+=self.size[0]returncoordsdef_apply_mask(self,mask):returnF.pad(mask,self.size,self.mask_value)
[docs]classResize(VisionTransform):r"""Resize the input data. Args: output_size: target size of image, with (height, width) shape. interpolation: interpolation method. All methods are listed below: * cv2.INTER_NEAREST – a nearest-neighbor interpolation. * cv2.INTER_LINEAR – a bilinear interpolation (used by default). * cv2.INTER_AREA – resampling using pixel area relation. * cv2.INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood. * cv2.INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood. order: the same with :class:`VisionTransform`. """def__init__(self,output_size,interpolation=cv2.INTER_LINEAR,*,order=None):super().__init__(order)self.output_size=output_sizeself.interpolation=interpolation
[docs]classShortestEdgeResize(VisionTransform):r"""Resize the input data with specified shortset edge."""def__init__(self,min_size,max_size,sample_style="range",interpolation=cv2.INTER_LINEAR,*,order=None):super().__init__(order)ifsample_stylenotin("range","choice"):raiseNotImplementedError("{} is unsupported sample style".format(sample_style))self.sample_style=sample_styleifisinstance(min_size,int):min_size=(min_size,min_size)self.min_size=min_sizeself.max_size=max_sizeself.interpolation=interpolation
[docs]classRandomResize(VisionTransform):r"""Resize the input data randomly. Args: scale_range: range of scaling. order: the same with :class:`VisionTransform`. """def__init__(self,scale_range,interpolation=cv2.INTER_LINEAR,*,order=None):super().__init__(order)self.scale_range=scale_rangeself.interpolation=interpolation
[docs]classRandomCrop(VisionTransform):r"""Crop the input data randomly. Before applying the crop transform, pad the image first. If target size is still bigger than the size of padded image, pad the image size to target size. Args: output_size: target size of output image, with (height, width) shape. padding_size: the same with `size` in ``Pad``. padding_value: the same with `value` in ``Pad``. order: the same with :class:`VisionTransform`. """def__init__(self,output_size,padding_size=0,padding_value=[0,0,0],padding_maskvalue=0,*,order=None):super().__init__(order)ifisinstance(output_size,int):self.output_size=(output_size,output_size)else:self.output_size=output_sizeself.pad=Pad(padding_size,padding_value,order=self.order)self.padding_value=padding_valueself.padding_maskvalue=padding_maskvalue
[docs]classRandomResizedCrop(VisionTransform):r"""Crop the input data to random size and aspect ratio. A crop of random size (default: of 0.08 to 1.0) of the original size and a random aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made. After applying crop transfrom, the input data will be resized to given size. Args: output_size: target size of output image, with (height, width) shape. scale_range: range of size of the origin size cropped. Default: (0.08, 1.0) ratio_range: range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33) order: the same with :class:`VisionTransform`. """def__init__(self,output_size,scale_range=(0.08,1.0),ratio_range=(3.0/4,4.0/3),interpolation=cv2.INTER_LINEAR,*,order=None):super().__init__(order)ifisinstance(output_size,int):self.output_size=(output_size,output_size)else:self.output_size=output_sizeassert(scale_range[0]<=scale_range[1]),"scale_range should be of kind (min, max)"assert(ratio_range[0]<=ratio_range[1]),"ratio_range should be of kind (min, max)"self.scale_range=scale_rangeself.ratio_range=ratio_rangeself.interpolation=interpolation
def_apply_image(self,image):x,y,w,h=self._coord_infocropped_img=image[y:y+h,x:x+w]returnF.resize(cropped_img,self.output_size,self.interpolation)def_apply_coords(self,coords):x,y,w,h=self._coord_infocoords[:,0]=(coords[:,0]-x)*self.output_size[1]/wcoords[:,1]=(coords[:,1]-y)*self.output_size[0]/hreturncoordsdef_apply_mask(self,mask):x,y,w,h=self._coord_infocropped_mask=mask[y:y+h,x:x+w]returnF.resize(cropped_mask,self.output_size,cv2.INTER_NEAREST)def_get_coord(self,image,attempts=10):height,width,_=image.shapearea=height*widthfor_inrange(attempts):target_area=np.random.uniform(*self.scale_range)*arealog_ratio=tuple(math.log(x)forxinself.ratio_range)aspect_ratio=math.exp(np.random.uniform(*log_ratio))w=int(round(math.sqrt(target_area*aspect_ratio)))h=int(round(math.sqrt(target_area/aspect_ratio)))if0<w<=widthand0<h<=height:x=np.random.randint(0,width-w+1)y=np.random.randint(0,height-h+1)returnx,y,w,h# Fallback to central cropin_ratio=float(width)/float(height)ifin_ratio<min(self.ratio_range):w=widthh=int(round(w/min(self.ratio_range)))elifin_ratio>max(self.ratio_range):h=heightw=int(round(h*max(self.ratio_range)))else:# whole imagew=widthh=heightx=(width-w)//2y=(height-h)//2returnx,y,w,h
[docs]classCenterCrop(VisionTransform):r"""Crops the given the input data at the center. Args: output_size: target size of output image, with (height, width) shape. order: the same with :class:`VisionTransform`. """def__init__(self,output_size,*,order=None):super().__init__(order)ifisinstance(output_size,int):self.output_size=(output_size,output_size)else:self.output_size=output_size
def_apply_image(self,image):x,y=self._coord_infoth,tw=self.output_sizereturnimage[y:y+th,x:x+tw]def_apply_coords(self,coords):x,y=self._coord_infocoords[:,0]-=xcoords[:,1]-=yreturncoordsdef_apply_mask(self,mask):x,y=self._coord_infoth,tw=self.output_sizereturnmask[y:y+th,x:x+tw]def_get_coord(self,image):th,tw=self.output_sizeh,w,_=image.shapeassertth<=handtw<=w,"output size is bigger than image size"x=int(round((w-tw)/2.0))y=int(round((h-th)/2.0))returnx,y
[docs]classRandomHorizontalFlip(VisionTransform):r"""Horizontally flip the input data randomly with a given probability. Args: p: probability of the input data being flipped. Default: 0.5 order: the same with :class:`VisionTransform`. """def__init__(self,prob:float=0.5,*,order=None):super().__init__(order)self.prob=prob
[docs]classRandomVerticalFlip(VisionTransform):r"""Vertically flip the input data randomly with a given probability. Args: p: probability of the input data being flipped. Default: 0.5 order: the same with :class:`VisionTransform`. """def__init__(self,prob:float=0.5,*,order=None):super().__init__(order)self.prob=prob
[docs]classNormalize(VisionTransform):r"""Normalize the input data with mean and standard deviation. Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform will normalize each channel of the input data. ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` Args: mean: sequence of means for each channel. std: sequence of standard deviations for each channel. order: the same with :class:`VisionTransform`. """def__init__(self,mean=0.0,std=1.0,*,order=None):super().__init__(order)self.mean=np.array(mean,dtype=np.float32)self.std=np.array(std,dtype=np.float32)def_apply_image(self,image):return(image-self.mean)/self.stddef_apply_coords(self,coords):returncoordsdef_apply_mask(self,mask):returnmask
[docs]classGaussianNoise(VisionTransform):r"""Add random gaussian noise to the input data. Gaussian noise is generated with given mean and std. Args: mean: Gaussian mean used to generate noise. std: Gaussian standard deviation used to generate noise. order: the same with :class:`VisionTransform` """def__init__(self,mean=0.0,std=1.0,*,order=None):super().__init__(order)self.mean=np.array(mean,dtype=np.float32)self.std=np.array(std,dtype=np.float32)def_apply_image(self,image):dtype=image.dtypenoise=np.random.normal(self.mean,self.std,image.shape)*255image=image+noise.astype(np.float32)returnnp.clip(image,0,255).astype(dtype)def_apply_coords(self,coords):returncoordsdef_apply_mask(self,mask):returnmask
[docs]classBrightnessTransform(VisionTransform):r"""Adjust brightness of the input data. Args: value: how much to adjust the brightness. Can be any non negative number. 0 gives the original image. order: the same with :class:`VisionTransform`. """def__init__(self,value,*,order=None):super().__init__(order)ifvalue<0:raiseValueError("brightness value should be non-negative")self.value=valuedef_apply_image(self,image):ifself.value==0:returnimagedtype=image.dtypeimage=image.astype(np.float32)alpha=np.random.uniform(max(0,1-self.value),1+self.value)image=image*alphareturnimage.clip(0,255).astype(dtype)def_apply_coords(self,coords):returncoordsdef_apply_mask(self,mask):returnmask
[docs]classContrastTransform(VisionTransform):r"""Adjust contrast of the input data. Args: value: how much to adjust the contrast. Can be any non negative number. 0 gives the original image. order: the same with :class:`VisionTransform`. """def__init__(self,value,*,order=None):super().__init__(order)ifvalue<0:raiseValueError("contrast value should be non-negative")self.value=valuedef_apply_image(self,image):ifself.value==0:returnimagedtype=image.dtypeimage=image.astype(np.float32)alpha=np.random.uniform(max(0,1-self.value),1+self.value)image=image*alpha+F.to_gray(image).mean()*(1-alpha)returnimage.clip(0,255).astype(dtype)def_apply_coords(self,coords):returncoordsdef_apply_mask(self,mask):returnmask
[docs]classSaturationTransform(VisionTransform):r"""Adjust saturation of the input data. Args: value: how much to adjust the saturation. Can be any non negative number. 0 gives the original image. order: the same with :class:`VisionTransform`. """def__init__(self,value,*,order=None):super().__init__(order)ifvalue<0:raiseValueError("saturation value should be non-negative")self.value=valuedef_apply_image(self,image):ifself.value==0:returnimagedtype=image.dtypeimage=image.astype(np.float32)alpha=np.random.uniform(max(0,1-self.value),1+self.value)image=image*alpha+F.to_gray(image)*(1-alpha)returnimage.clip(0,255).astype(dtype)def_apply_coords(self,coords):returncoordsdef_apply_mask(self,mask):returnmask
[docs]classHueTransform(VisionTransform):r"""Adjust hue of the input data. Args: value: how much to adjust the hue. Can be any number between 0 and 0.5, 0 gives the original image. order: the same with :class:`VisionTransform`. """def__init__(self,value,*,order=None):super().__init__(order)ifvalue<0orvalue>0.5:raiseValueError("hue value should be in [0.0, 0.5]")self.value=valuedef_apply_image(self,image):ifself.value==0:returnimagedtype=image.dtypeimage=image.astype(np.uint8)hsv_image=cv2.cvtColor(image,cv2.COLOR_BGR2HSV_FULL)h,s,v=cv2.split(hsv_image)alpha=np.random.uniform(-self.value,self.value)h=h.astype(np.uint8)# uint8 addition take cares of rotation across boundarieswithnp.errstate(over="ignore"):h+=np.uint8(alpha*255)hsv_image=cv2.merge([h,s,v])returncv2.cvtColor(hsv_image,cv2.COLOR_HSV2BGR_FULL).astype(dtype)def_apply_coords(self,coords):returncoordsdef_apply_mask(self,mask):returnmask
[docs]classColorJitter(VisionTransform):r"""Randomly change the brightness, contrast, saturation and hue of an image. Args: brightness: how much to jitter brightness. Chosen uniformly from [max(0, 1 - brightness), 1 + brightness] or the given [min, max]. Should be non negative numbers. contrast: how much to jitter contrast. Chosen uniformly from [max(0, 1 - contrast), 1 + contrast] or the given [min, max]. Should be non negative numbers. saturation: how much to jitter saturation. Chosen uniformly from [max(0, 1 - saturation), 1 + saturation] or the given [min, max]. Should be non negative numbers. hue: how much to jitter hue. Chosen uniformly from [-hue, hue] or the given [min, max]. Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. order: the same with :class:`VisionTransform`. """def__init__(self,brightness=0,contrast=0,saturation=0,hue=0,*,order=None):super().__init__(order)transforms=[]ifbrightness!=0:transforms.append(BrightnessTransform(brightness))ifcontrast!=0:transforms.append(ContrastTransform(contrast))ifsaturation!=0:transforms.append(SaturationTransform(saturation))ifhue!=0:transforms.append(HueTransform(hue))self.transforms=Compose(transforms,shuffle_indices=[tuple(range(1,len(transforms)+1))],order=order,)
[docs]classLighting(VisionTransform):r"""Apply AlexNet-Style "lighting" augmentation to input data. Input images are assumed to have 'RGB' channel order. The degree of color jittering is randomly sampled via a normal distribution, with standard deviation given by the scale parameter. """def__init__(self,scale,*,order=None):super().__init__(order)ifscale<0:raiseValueError("lighting scale should be non-negative")self.scale=scaleself.eigvec=np.array([[-0.5836,-0.6948,0.4203],[-0.5808,-0.0045,-0.8140],[-0.5675,0.7192,0.4009],])# reverse the first dimension for BGRself.eigval=np.array([0.2175,0.0188,0.0045])def_apply_image(self,image):ifself.scale==0:returnimagedtype=image.dtypeimage=image.astype(np.float32)alpha=np.random.normal(scale=self.scale*255,size=3)image=image+self.eigvec.dot(alpha*self.eigval)returnimage.clip(0,255).astype(dtype)def_apply_coords(self,coords):returncoordsdef_apply_mask(self,mask):returnmask