数据增强——基本方法



基本数据增强主要包含如下方式:
1.旋转: 可通过在原图上先放大图像,然后剪切图像得到。
2.平移:先放大图像,然后水平或垂直偏移位置剪切
3.缩放:缩放图像
4.随机遮挡:对图像进行小区域遮挡
5.水平翻转:以过图像中心的竖直轴为对称轴,将左、右两边像素交换
6.颜色色差(饱和度、亮度、对比度、 锐度等)
7.噪声扰动: 对图像的每个像素RGB进行随机扰动, 常用的噪声模式是椒盐噪声和高斯噪声;


Tensorflow代码实现:
参数可根据需求进行相应调整。

# -*- coding: utf-8 -*-
"""
# 数据增强实现
"""
import tensorflow as tf
import cv2
import numpy as np
from scipy import misc
import random

def random_rotate_image(image):
    interb = ['nearest','bilinear','cubic','bicubic']
    angle = np.random.uniform(low=-10.0, high=10.0)
    key = random.randint(0,3)
    return misc.imrotate(image, angle, interb[key])

def random_occlusion(image):
    b_ratio = 1./10 #遮挡比例
    M1 = np.ones((320,250))
    b_H = random.randint(10,320*(1-b_ratio)-10)  
    b_W = random.randint(10,250*(1-b_ratio)-10)
    M1[b_H:int(b_H+320*b_ratio),b_W:int(b_W+250*b_ratio)] = 0
    M1 = np.expand_dims(M1, 2)
    image = image*M1
    image = image.astype(np.uint8)
    return image

def data_augumrntation(image):
    image = tf.py_func(random_occlusion, [image], tf.uint8) #随机遮挡
    image = tf.py_func(random_rotate_image, [image], tf.uint8) #旋转
    ratio = [0.9,1.1] #缩放比例
    new_H = random.randint(320*ratio[0], 320*ratio[1])
    new_W = random.randint(250*ratio[0], 250*ratio[1])
    print(new_H,new_W)
    image.set_shape((320, 250,3))
    image = tf.image.resize_images(image,[new_H, new_W])
    image = tf.cast(image,tf.uint8)
    image = tf.image.resize_image_with_crop_or_pad(image, 320, 250 )#缩放
    image = tf.random_crop(image, [299, 235, 3]) #随机裁剪
    image = tf.image.random_flip_left_right(image)#镜像
    N_key = random.randint(0,10)
    if N_key == 8:
        image = tf.image.per_image_standardization(image)#标准化
    image = tf.cast(image, tf.float32)
    image = tf.minimum(255.0, tf.maximum(0.0,tf.image.random_brightness(image,25.0)))#光照
    image = tf.minimum(255.0, tf.maximum(0.0,tf.image.random_contrast(image,0.8,1.2)))#对比度
    noise = tf.random_normal((299, 235, 3), mean=0.0, stddev=1.0, dtype=tf.float32)
    image = tf.minimum(255.0, tf.maximum(0.0,image+noise))#随机噪声    
    image = tf.subtract(image,127.5)
    image = tf.multiply(image,0.0078125)    
    return image

if __name__ == '__main__':
    pic = r"bb.jpg"
    file_contents = tf.read_file(pic)
    image = tf.image.decode_jpeg(file_contents, dct_method="INTEGER_ACCURATE")
    R,G,B=tf.unstack(image, num=3, axis=2)
    image=tf.stack([B,G,R], axis=2) #通道转换
    image = data_augumrntation(image)

    #image = tf.cast(image,tf.uint8)
    sess = tf.Session()
    img = sess.run(image)
    cv2.imshow('img',img)
    cv2.waitKey()

原图:
这里写图片描述
增强后图像(图像做了归一化操作):


注:博众家之所长,集群英之荟萃。

在这里插入图片描述