PyCaffe Transformer

img

在使用Caffe训练网络的时候,往往会在train的prototxt里对图像有一些预处理操作、例如做resize、对训练数据减去均值等,在实际推理的时候我们还需把输入图像resize到网络输入的大小,必要的时候还需要做图像通道的transpose。在用PyCaffe进行推理的时候,这些操作需要使用caffe的transformer来实现,本文记述prototxt的预处理键值

通过查看Transformer对象的源代码可以知道有哪些方法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
class Transformer:
"""
Transform input for feeding into a Net.
Note: this is mostly for illustrative purposes and it is likely better
to define your own input preprocessing routine for your needs.
Parameters
----------
net : a Net for which the input should be prepared
"""
def __init__(self, inputs):
self.inputs = inputs # inputs存的是一个name和shape的字典,比如 {'data':(1,3,256,340)}
self.transpose = {} # Transformer中的这些参数全部是字典dict
self.channel_swap = {}
self.raw_scale = {}
self.mean = {}
self.input_scale = {}
def __check_input(self, in_):
if in_ not in self.inputs:
raise Exception('{} is not one of the net inputs: {}'.format(
in_, self.inputs))
def preprocess(self, in_, data):
"""
Format input for Caffe:
- convert to single
- resize to input dimensions (preserving number of channels)
- transpose dimensions to K x H x W
- reorder channels (for instance color to BGR)
- scale raw input (e.g. from [0, 1] to [0, 255] for ImageNet models)
- subtract mean
- scale feature
Parameters
----------
in_ : name of input blob to preprocess for
data : (H' x W' x K) ndarray !!!!输入data必须是单个image,后面有限制
Returns
-------
caffe_in : (K x H x W) ndarray for input to a Net
"""
self.__check_input(in_)
caffe_in = data.astype(np.float32, copy=False)
transpose = self.transpose.get(in_)
channel_swap = self.channel_swap.get(in_)
raw_scale = self.raw_scale.get(in_)
mean = self.mean.get(in_)
input_scale = self.input_scale.get(in_)
in_dims = self.inputs[in_][2:] # in_dims即inputs设定的shape的(H,W)
# 检查输入data的(H,W)是否与in_dims一样,注意这里[:2]限制了输入data只能是单个iamge
if caffe_in.shape[:2] != in_dims:
caffe_in = resize_image(caffe_in, in_dims)
if transpose is not None:
caffe_in = caffe_in.transpose(transpose)
if channel_swap is not None: # 如果没有transpose第一维不是通道,这里会报错吧。。。
caffe_in = caffe_in[channel_swap, :, :]
if raw_scale is not None:
caffe_in *= raw_scale
if mean is not None:
caffe_in -= mean
if input_scale is not None:
caffe_in *= input_scale
return caffe_in
def deprocess(self, in_, data):
"""
Invert Caffe formatting; see preprocess().
"""
self.__check_input(in_)
decaf_in = data.copy().squeeze()
transpose = self.transpose.get(in_)
channel_swap = self.channel_swap.get(in_)
raw_scale = self.raw_scale.get(in_)
mean = self.mean.get(in_)
input_scale = self.input_scale.get(in_)
if input_scale is not None:
decaf_in /= input_scale
if mean is not None:
decaf_in += mean
if raw_scale is not None:
decaf_in /= raw_scale
if channel_swap is not None:
decaf_in = decaf_in[np.argsort(channel_swap), :, :]
if transpose is not None:
decaf_in = decaf_in.transpose(np.argsort(transpose))
return decaf_in
def set_transpose(self, in_, order):
"""
Set the input channel order for e.g. RGB to BGR conversion
as needed for the reference ImageNet model.
Parameters
----------
in_ : which input to assign this channel order
order : the order to transpose the dimensions
"""
self.__check_input(in_)
if len(order) != len(self.inputs[in_]) - 1: #注意,这里只比较设定shape的后3维
raise Exception('Transpose order needs to have the same number of '
'dimensions as the input.')
self.transpose[in_] = order
def set_channel_swap(self, in_, order):
"""
Set the input channel order for e.g. RGB to BGR conversion
as needed for the reference ImageNet model.
N.B. this assumes the channels are the first dimension AFTER transpose.
Parameters
----------
in_ : which input to assign this channel order
order : the order to take the channels.
(2,1,0) maps RGB to BGR for example.
"""
self.__check_input(in_)
if len(order) != self.inputs[in_][1]:
raise Exception('Channel swap needs to have the same number of '
'dimensions as the input channels.')
self.channel_swap[in_] = order
def set_raw_scale(self, in_, scale):
"""
Set the scale of raw features s.t. the input blob = input * scale.
While Python represents images in [0, 1], certain Caffe models
like CaffeNet and AlexNet represent images in [0, 255] so the raw_scale
of these models must be 255.
Parameters
----------
in_ : which input to assign this scale factor
scale : scale coefficient
"""
self.__check_input(in_)
self.raw_scale[in_] = scale
def set_mean(self, in_, mean):
"""
Set the mean to subtract for centering the data.
Parameters
----------
in_ : which input to assign this mean.
mean : mean ndarray (input dimensional or broadcastable)
"""
self.__check_input(in_)
ms = mean.shape
if mean.ndim == 1:
# broadcast channels
if ms[0] != self.inputs[in_][1]:
raise ValueError('Mean channels incompatible with input.')
mean = mean[:, np.newaxis, np.newaxis]
else:
# elementwise mean
if len(ms) == 2:
ms = (1,) + ms
if len(ms) != 3:
raise ValueError('Mean shape invalid')
if ms != self.inputs[in_][1:]:
raise ValueError('Mean shape incompatible with input shape.')
self.mean[in_] = mean
def set_input_scale(self, in_, scale):
"""
Set the scale of preprocessed inputs s.t. the blob = blob * scale.
N.B. input_scale is done AFTER mean subtraction and other preprocessing
while raw_scale is done BEFORE.
Parameters
----------
in_ : which input to assign this scale factor
scale : scale coefficient
"""
self.__check_input(in_)
self.input_scale[in_] = scale

在Python中有很典型的用法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

import numpy as np
import sys,os
# 设置当前的工作环境在caffe下
caffe_root = '/home/xxx/caffe/'
# 我们也把caffe/python也添加到当前环境
sys.path.insert(0, caffe_root + 'python')
import caffe
os.chdir(caffe_root)#更换工作目录
# 设置网络结构
net_file=caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
# 添加训练之后的参数
caffe_model=caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
# 均值文件
mean_file=caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy'
# 这里对任何一个程序都是通用的,就是处理图片
# 把上面添加的两个变量都作为参数构造一个Net
net = caffe.Net(net_file,caffe_model,caffe.TEST)
# 得到data的形状,这里的图片是默认matplotlib底层加载的
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
# matplotlib加载的image是像素[0-1],图片的数据格式[weight,high,channels],RGB
# caffe加载的图片需要的是[0-255]像素,数据格式[channels,weight,high],BGR,那么就需要转换
# channel 放到前面
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', np.load(mean_file).mean(1).mean(1))
# 图片像素放大到[0-255]
transformer.set_raw_scale('data', 255)
# RGB-->BGR 转换
transformer.set_channel_swap('data', (2,1,0))
# 这里才是加载图片
im=caffe.io.load_image(caffe_root+'examples/images/cat.jpg')
# 用上面的transformer.preprocess来处理刚刚加载图片
net.blobs['data'].data[...] = transformer.preprocess('data',im)
#注意,网络开始向前传播啦
out = net.forward()
# 最终的结果: 当前这个图片的属于哪个物体的概率(列表表示)
output_prob = output['prob'][0]
# 找出最大的那个概率
print('predicted class is:', output_prob.argmax())
# 也可以找出前五名的概率
top_inds = output_prob.argsort()[::-1][:5]
print('probabilities and labels:')
for i in top_inds:
print(i,output_prob[i], labels[i])

Resize

注意到,Transformer并没有提供resize的接口,其实在:

1
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

这里会自动指定输入图像的shape,之后调用preprocess:

1
net.blobs['data'].data[...] = transformer.preprocess('data',im)

会自动做resize。

手动指定mean

一般caffe的数据集会提供mean的binary文件,但有的时候一些train的脚本不会使用到这个文件,举个例子,这是一个针对imagenet的train.prototxt:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
crop_size: 224
mean_value: 104
mean_value: 117
mean_value: 123
}
data_param {
source: "../ilsvrc12_lmdb_shrt_256/ilsvrc12_train_lmdb"
batch_size: 128
backend: LMDB
}
}

这时候该怎么办呢,这部分含义是,对R、G、B通道减去104、117、123,所以需要:

1
2
3
4
5
mean = np.ones([3, 224, 224], dtype=np.float)
mean[0,:,:] = 104
mean[1,:,:] = 117
mean[2,:,:] = 123
transformer.set_mean('data', mean) # subtract the dataset-mean value in each channel

图像的保存

这里,我推荐大家使用opencv、因为PIL.Image这个库我使用了有很多不方便的地方。

1
2
3
4
saveim = transformed_image.transpose((1, 2, 0))
cv.imwrite(filepath, saveim)
image = cv.imread(image)
image = image.transpose((2,0,1))
2021.01 What i Read 2020.12 What i Read

Comments

Your browser is out-of-date!

Update your browser to view this website correctly. Update my browser now

×