数据增强总结

101 阅读 0 评论 67 点赞

我是靠谱客的博主还单身小伙，最近开发中收集的这篇文章主要介绍数据增强总结，觉得挺不错的，现在分享给大家，希望可以做个参考。

概述

颜色扰动

对比度，亮度，明度，饱和度

def adjust_contrast(image, factor):
    mean = image.mean(axis=0).mean(axis=0)
    return _clip((image - mean) * factor + mean)

def adjust_brightness(image, delta):
    return _clip(image + delta * 255)

def adjust_hue(image, delta):
    image[..., 0] = np.mod(image[..., 0] + delta * 180, 180)
    return image

def adjust_saturation(image, factor):
    image[..., 1] = np.clip(image[..., 1] * factor, 0, 255)
    return image

def _clip(image):
    return np.clip(image, 0, 255).astype(np.uint8)
    
def _uniform(val_range):
    return np.random.uniform(val_range[0], val_range[1])
    
class ColorDistort():

    def __init__(
            self,
            contrast_range=(0.8, 1.2),
            brightness_range=(-.2, .2),
            hue_range=(-0.1, 0.1),
            saturation_range=(0.8, 1.2)
    ):
        self.contrast_range = contrast_range
        self.brightness_range = brightness_range
        self.hue_range = hue_range
        self.saturation_range = saturation_range

    def __call__(self, image):
        if self.contrast_range is not None:
            contrast_factor = _uniform(self.contrast_range)
            image = adjust_contrast(image,contrast_factor)
            
        if self.brightness_range is not None:
            brightness_delta = _uniform(self.brightness_range)
            image = adjust_brightness(image, brightness_delta)

        if self.hue_range is not None or self.saturation_range is not None:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
            if self.hue_range is not None:
                hue_delta = _uniform(self.hue_range)
                image = adjust_hue(image, hue_delta)

            if self.saturation_range is not None:
                saturation_factor = _uniform(self.saturation_range)
                image = adjust_saturation(image, saturation_factor)

            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)

        return image
        
self.color_augmentor = ColorDistort()
image = cv2.imread(fname, cv2.IMREAD_COLOR)
image = image.astype(np.uint8)
image =self.color_augmentor(image)

图像高斯模糊和heatmap高斯模糊

def blur_heatmap(src, ksize=(3, 3)):
    for i in range(src.shape[2]):
        src[:, :, i] = cv2.GaussianBlur(src[:, :, i], ksize, 0)
        amin, amax = src[:, :, i].min(), src[:, :, i].max()  # 求最大最小值
        if amax>0:
            src[:, :, i] = (src[:, :, i] - amin) / (amax - amin)  # (矩阵元素-最小值)/(最大值-最小值)
    return src
    
def blur(src,ksize=(3,3)):
    for i in range(src.shape[2]):
        src[:, :, i]=cv2.GaussianBlur(src[:, :, i],ksize,1.5)
    return src

灰度和通道变换

def gray(src):
    g_img=cv2.cvtColor(src,cv2.COLOR_RGB2GRAY)
    src[:,:,0]=g_img
    src[:,:,1]=g_img
    src[:,:,2]=g_img
    return src

def swap_change(src):
    a = [0,1,2]
    k = random.sample(a, 3)
    res=src.copy()
    res[:,:,0]=src[:,:,k[0]]
    res[:, :, 1] = src[:, :, k[1]]
    res[:, :, 2] = src[:, :, k[2]]
    return res

像素扰动

def pixel_jitter(src,p=0.5,max_=5.):
    src=src.astype(np.float32)
    if random.uniform(0, 1) < p:
        pattern=(np.random.rand(src.shape[0], src.shape[1],src.shape[2])-0.5)*2*max_
        img = src + pattern
        img[img<0]=0
        img[img >255] = 255
        img = img.astype(np.uint8)
        return img
    else:
        src = src.astype(np.uint8)
        return src

几何变换

多线程离线旋转、翻转

import time
import threadpool
import os
from PIL import Image

name = ["/media/wxy/000F8E4B0002F751/test/"+name_ for name_ in os.listdir("./test")]

def create_read_img(filename):
    # 读取图像
    im = Image.open(filename)
    out_h = im.transpose(Image.FLIP_LEFT_RIGHT)
    out_w = im.transpose(Image.FLIP_TOP_BOTTOM)
    out_90 = im.transpose(Image.ROTATE_90)
    out_180 = im.transpose(Image.ROTATE_180)
    out_270 = im.transpose(Image.ROTATE_270)

    out_h.save(filename[:-4] + '_h.png')
    out_w.save(filename[:-4] + '_w.png')
    out_90.save(filename[:-4] + '_90.png')
    out_180.save(filename[:-4] + '_180.png')
    out_270.save(filename[:-4] + '_270.png')
    print(filename)

start_time = time.time()
pool = threadpool.ThreadPool(5)
requests = threadpool.makeRequests(create_read_img, name)
[pool.putRequest(req) for req in requests]
pool.wait()
print ('%d second'% (time.time()-start_time))

对图像中的roi区域进行仿射变换

def get_3rd_point(a, b):
    direct = a - b
    return b + np.array([-direct[1], direct[0]], dtype=np.float32)


def get_dir(src_point, rot_rad):
    sn, cs = np.sin(rot_rad), np.cos(rot_rad)

    src_result = [0, 0]
    src_result[0] = src_point[0] * cs - src_point[1] * sn
    src_result[1] = src_point[0] * sn + src_point[1] * cs

    return src_result
    
def get_affine_transform(
        center, scale, rot, output_size,
        shift=np.array([0, 0], dtype=np.float32), inv=0
):
    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
        print(scale)
        scale = np.array([scale, scale])

    scale_tmp = scale * 200.0
    src_w = scale_tmp[0]
    dst_w = output_size[0]
    dst_h = output_size[1]

    rot_rad = np.pi * rot / 180
    src_dir = get_dir([0, src_w * -0.5], rot_rad)
    dst_dir = np.array([0, dst_w * -0.5], np.float32)

    src = np.zeros((3, 2), dtype=np.float32)
    dst = np.zeros((3, 2), dtype=np.float32)
    src[0, :] = center + scale_tmp * shift
    src[1, :] = center + src_dir + scale_tmp * shift
    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir

    src[2:, :] = get_3rd_point(src[0, :], src[1, :])
    dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])

    if inv:
        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
    else:
        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))

    return trans
    
data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
            )
data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

c = center #中心点
s = scale #尺度范围

sf = scale_factor #尺度因子
rf = rotation_factor #旋转因子
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
 if random.random() <= 0.6 else 0 #随机调整尺度范围
r = np.clip(np.random.randn()*rf, -rf*2, rf*2) 
 if random.random() <= 0.6 else 0 #随机调整旋转范围
 #随机翻转
if self.flip and random.random() <= 0.5:
    data_numpy = data_numpy[:, ::-1, :]
    c[0] = data_numpy.shape[1] - c[0] - 1
if self.flip and random.random() <= 0.5:
    data_numpy = data_numpy[::-1, :, :]
    c[1] = data_numpy.shape[0] - c[1] - 1
 
trans = get_affine_transform(c, s, r, self.image_size)
#trans = np.float32([[1, 0, 200], [0, 1, 100]]) 平移
#第一个参数旋转中心，第二个参数旋转角度，第三个参数：缩放比例, 生成一２＊３的矩
#trans = cv2.getRotationMatrix2D((cols/2,rows/2),90,1)  

input = cv2.warpAffine(
            data_numpy,
            trans,
            (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

目标检测中的数据增强

DSFD人脸

随机大小

def Random_scale_withbbox(image,bboxes,target_shape,jitter=0.5):
    ###the boxes is in ymin,xmin,ymax,xmax mode
    hi, wi, _ = image.shape

    while 1:
        if len(bboxes)==0:
            print('errrrrrr')
        bboxes_=np.array(bboxes)
        crop_h = int(hi * random.uniform(0.2, 1))
        crop_w = int(wi * random.uniform(0.2, 1))

        start_h = random.randint(0, hi - crop_h)
        start_w = random.randint(0, wi - crop_w)

        croped = image[start_h:start_h + crop_h, start_w:start_w + crop_w, :]

        bboxes_[:, 0] = bboxes_[:, 0] - start_w
        bboxes_[:, 1] = bboxes_[:, 1] - start_h
        bboxes_[:, 2] = bboxes_[:, 2] - start_w
        bboxes_[:, 3] = bboxes_[:, 3] - start_h

        bboxes_fix=box_in_img(croped,bboxes_)
        if len(bboxes_fix)>0:
            break

    ###use box
    h,w=target_shape
    croped_h,croped_w,_=croped.shape

    croped_h_w_ratio=croped_h/croped_w

    rescale_h=int(h * random.uniform(0.5, 1))

    rescale_w = int(rescale_h/(random.uniform(0.7, 1.3)*croped_h_w_ratio))
    rescale_w=np.clip(rescale_w,0,w)

    image=cv2.resize(croped,(rescale_w,rescale_h))

    bboxes_fix[:, 0] = bboxes_fix[:, 0] * rescale_w/ croped_w
    bboxes_fix[:, 1] = bboxes_fix[:, 1] * rescale_h / croped_h
    bboxes_fix[:, 2] = bboxes_fix[:, 2] * rescale_w / croped_w
    bboxes_fix[:, 3] = bboxes_fix[:, 3] * rescale_h / croped_h

    return image,bboxes_fix

随机翻转

def Random_flip(im, boxes):

    im_lr = np.fliplr(im).copy()
    h,w,_ = im.shape
    xmin = w - boxes[:,2]
    xmax = w - boxes[:,0]
    boxes[:,0] = xmin
    boxes[:,2] = xmax
    return im_lr, boxes

随机剪切

class RandomSampleCrop(object):
    """Crop
    Arguments:
        img (Image): the image being input during training
        boxes (Tensor): the original bounding boxes in pt form
        labels (Tensor): the class labels for each bbox
        mode (float tuple): the min and max jaccard overlaps
    Return:
        (img, boxes, classes)
            img (Image): the cropped image
            boxes (Tensor): the adjusted bounding boxes in pt form
            labels (Tensor): the class labels for each bbox
    """
    def __init__(self):
        self.sample_options = (
            # using entire original input image
            None,
            # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
            (0.1, None),
            (0.3, None),
            (0.7, None),
            (0.9, None),
            # randomly sample a patch
            (None, None),
        )

    def __call__(self, image, boxes=None, labels=None):
        height, width, _ = image.shape
        while True:
            # randomly choose a mode
            mode = random.choice(self.sample_options)
            if mode is None:
                return image, boxes, labels

            min_iou, max_iou = mode
            if min_iou is None:
                min_iou = float('-inf')
            if max_iou is None:
                max_iou = float('inf')

            # max trails (50)
            for _ in range(50):
                current_image = image

                w = random.uniform(0.3 * width, width)
                h = random.uniform(0.3 * height, height)

                # aspect ratio constraint b/t .5 & 2
                if h / w < 0.5 or h / w > 2:
                    continue

                left = random.uniform(0,width - w)
                top = random.uniform(0,height - h)

                # convert to integer rect x1,y1,x2,y2
                rect = np.array([int(left), int(top), int(left+w), int(top+h)])

                # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
                overlap = self.jaccard_numpy(boxes, rect)

                # is min and max overlap constraint satisfied? if not try again
                if overlap.min() < min_iou and max_iou < overlap.max():
                    continue

                # cut the crop from the image
                current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
                                              :]

                # keep overlap with gt box IF center in sampled patch
                centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0

                # mask in all gt boxes that above and to the left of centers
                m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])

                # mask in all gt boxes that under and to the right of centers
                m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])

                # mask in that both m1 and m2 are true
                mask = m1 * m2

                # have any valid boxes? try again if not
                if not mask.any():
                    continue

                # take only matching gt boxes
                current_boxes = boxes[mask, :].copy()

                # take only matching gt labels
                current_labels = labels[mask]

                # should we use the box left and top corner or the crop's
                current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
                                                  rect[:2])
                # adjust to crop (by substracting crop's left,top)
                current_boxes[:, :2] -= rect[:2]

                current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
                                                  rect[2:])
                # adjust to crop (by substracting crop's left,top)
                current_boxes[:, 2:] -= rect[:2]

                return current_image, current_boxes, current_labels

    def jaccard_numpy(self,box_a, box_b):
        """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
        is simply the intersection over union of two boxes.
        Args:
            box_a: Multiple bounding boxes, Shape: [num_boxes,4]
            box_b: Single bounding box, Shape: [4]
        Return:
            jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
        """
        inter = self.intersect(box_a, box_b)
        area_a = ((box_a[:, 2]-box_a[:, 0]) *
                  (box_a[:, 3]-box_a[:, 1]))  # [A,B]
        area_b = ((box_b[2]-box_b[0]) *
                  (box_b[3]-box_b[1]))  # [A,B]
        union = area_a + area_b - inter
        return inter / union  # [A,B]


    def intersect(self,box_a, box_b):
        max_xy = np.minimum(box_a[:, 2:], box_b[2:])
        min_xy = np.maximum(box_a[:, :2], box_b[:2])
        inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
        return inter[:, 0] * inter[:, 1]
        
dsfd_aug=RandomSampleCrop()

随机剪切

class RandomBaiduCrop(object):
    """Crop
    Arguments:
        img (Image): the image being input during training
        boxes (Tensor): the original bounding boxes in pt form
        labels (Tensor): the class labels for each bbox
        mode (float tuple): the min and max jaccard overlaps
    Return:
        (img, boxes, classes)
            img (Image): the cropped image
            boxes (Tensor): the adjusted bounding boxes in pt form
            labels (Tensor): the class labels for each bbox
    """

    def __init__(self, size):

        self.mean = np.array([103, 116, 123], dtype=np.float32)
        self.maxSize = 12000  # max size
        self.infDistance = 9999999
        self.size = size

    def __call__(self, image, boxes=None, labels=None):
        height, width, _ = image.shape
        random_counter = 0
        boxArea = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
        # argsort = np.argsort(boxArea)
        # rand_idx = random.randint(min(len(argsort),6))
        # print('rand idx',rand_idx)
        rand_idx = random.randint(0,len(boxArea)-1)
        rand_Side = boxArea[rand_idx] ** 0.5
        # rand_Side = min(boxes[rand_idx,2] - boxes[rand_idx,0] + 1, boxes[rand_idx,3] - boxes[rand_idx,1] + 1)
        anchors = [16, 32, 64, 128, 256, 512]
        distance = self.infDistance
        anchor_idx = 5
        for i, anchor in enumerate(anchors):
            if abs(anchor - rand_Side) < distance:
                distance = abs(anchor - rand_Side)
                anchor_idx = i
        target_anchor = random.choice(anchors[0:min(anchor_idx + 1, 5) + 1])
        ratio = float(target_anchor) / rand_Side
        ratio = ratio * (2 ** random.uniform(-1, 1))
        if int(height * ratio * width * ratio) > self.maxSize * self.maxSize:
            ratio = (self.maxSize * self.maxSize / (height * width)) ** 0.5
        interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
        interp_method = random.choice(interp_methods)
        image = cv2.resize(image, None, None, fx=ratio, fy=ratio, interpolation=interp_method)
        boxes[:, 0] *= ratio
        boxes[:, 1] *= ratio
        boxes[:, 2] *= ratio
        boxes[:, 3] *= ratio
        height, width, _ = image.shape
        sample_boxes = []
        xmin = boxes[rand_idx, 0]
        ymin = boxes[rand_idx, 1]
        bw = (boxes[rand_idx, 2] - boxes[rand_idx, 0] + 1)
        bh = (boxes[rand_idx, 3] - boxes[rand_idx, 1] + 1)
        w = h = self.size

        for _ in range(50):
            if w < max(height, width):
                if bw <= w:
                    w_off = random.uniform(xmin + bw - w, xmin)
                else:
                    w_off = random.uniform(xmin, xmin + bw - w)
                if bh <= h:
                    h_off = random.uniform(ymin + bh - h, ymin)
                else:
                    h_off = random.uniform(ymin, ymin + bh - h)
            else:
                w_off = random.uniform(width - w, 0)
                h_off = random.uniform(height - h, 0)
            w_off = math.floor(w_off)
            h_off = math.floor(h_off)
            # convert to integer rect x1,y1,x2,y2
            rect = np.array([int(w_off), int(h_off), int(w_off + w), int(h_off + h)])
            # keep overlap with gt box IF center in sampled patch
            centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
            # mask in all gt boxes that above and to the left of centers
            m1 = (rect[0] <= boxes[:, 0]) * (rect[1] <= boxes[:, 1])
            # mask in all gt boxes that under and to the right of centers
            m2 = (rect[2] >= boxes[:, 2]) * (rect[3] >= boxes[:, 3])
            # mask in that both m1 and m2 are true
            mask = m1 * m2
            overlap = self.jaccard_numpy(boxes, rect)
            # have any valid boxes? try again if not
            if not mask.any() and not overlap.max() > 0.7:
                continue
            else:
                sample_boxes.append(rect)

        if len(sample_boxes) > 0:
            choice_idx = random.randint(0,len(sample_boxes)-1)
            choice_box = sample_boxes[choice_idx]
            # print('crop the box :',choice_box)
            centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
            m1 = (choice_box[0] < centers[:, 0]) * (choice_box[1] < centers[:, 1])
            m2 = (choice_box[2] > centers[:, 0]) * (choice_box[3] > centers[:, 1])
            mask = m1 * m2
            current_boxes = boxes[mask, :].copy()
            current_labels = labels[mask]
            current_boxes[:, :2] -= choice_box[:2]
            current_boxes[:, 2:] -= choice_box[:2]
            if choice_box[0] < 0 or choice_box[1] < 0:
                new_img_width = width if choice_box[0] >= 0 else width - choice_box[0]
                new_img_height = height if choice_box[1] >= 0 else height - choice_box[1]
                image_pad = np.zeros((new_img_height, new_img_width, 3), dtype=float)+np.array(cfg.DATA.PIXEL_MEAN,dtype=float)
                start_left = 0 if choice_box[0] >= 0 else -choice_box[0]
                start_top = 0 if choice_box[1] >= 0 else -choice_box[1]
                image_pad[start_top:, start_left:, :] = image

                choice_box_w = choice_box[2] - choice_box[0]
                choice_box_h = choice_box[3] - choice_box[1]

                start_left = choice_box[0] if choice_box[0] >= 0 else 0
                start_top = choice_box[1] if choice_box[1] >= 0 else 0
                end_right = start_left + choice_box_w
                end_bottom = start_top + choice_box_h
                current_image = image_pad[start_top:end_bottom, start_left:end_right, :].copy()
                return current_image, current_boxes, current_labels
            current_image = image[choice_box[1]:choice_box[3], choice_box[0]:choice_box[2], :].copy()
            return current_image, current_boxes, current_labels
        else:
            return image, boxes, labels
    def jaccard_numpy(self,box_a, box_b):
        """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
        is simply the intersection over union of two boxes.
        Args:
            box_a: Multiple bounding boxes, Shape: [num_boxes,4]
            box_b: Single bounding box, Shape: [4]
        Return:
            jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
        """
        inter = self.intersect(box_a, box_b)
        area_a = ((box_a[:, 2]-box_a[:, 0]) *
                  (box_a[:, 3]-box_a[:, 1]))  # [A,B]
        area_b = ((box_b[2]-box_b[0]) *
                  (box_b[3]-box_b[1]))  # [A,B]
        union = area_a + area_b - inter
        return inter / union  # [A,B]


    def intersect(self,box_a, box_b):
        max_xy = np.minimum(box_a[:, 2:], box_b[2:])
        min_xy = np.maximum(box_a[:, :2], box_b[:2])
        inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
        return inter[:, 0] * inter[:, 1]

import sys
sys.path.append('.')
from train_config import config as cfg
baidu_aug=RandomBaiduCrop(cfg.DATA.hin)

sample_dice = random.uniform(0, 1)
if sample_dice > 0.8 and sample_dice <= 1:
    image, boxes = Random_scale_withbbox(image, boxes, target_shape=[cfg.DATA.hin, cfg.DATA.win],
                                                         jitter=0.3)
elif sample_dice > 0.4 and sample_dice <= 0.8:
    boxes_ = boxes[:, 0:4]
    klass_ = boxes[:, 4:]

    image, boxes_, klass_ = dsfd_aug(image, boxes_, klass_)

    image = image.astype(np.uint8)
    boxes = np.concatenate([boxes_, klass_], axis=1)
else:
    boxes_ = boxes[:, 0:4]
    klass_ = boxes[:, 4:]
    image, boxes_, klass_ = baidu_aug(image, boxes_, klass_)

    image = image.astype(np.uint8)
    boxes = np.concatenate([boxes_, klass_], axis=1)

if random.uniform(0, 1) > 0.5:
    image, boxes = Random_flip(image, boxes)

if random.uniform(0, 1) > 0.5:
    image =self.color_augmentor(image)