Darknet CNN部分代码阅读

  1. LOAD

  2. TEST

  3. TRAIN
    着重于Darknet框架的CNN部分,参考了杰同学的注释, 感谢!

LOAD

parse.c/parse_net_options函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
void parse_net_options(list *options, network *net)
{
net->batch = option_find_int(options, "batch",1);//test:batch=1,subdivision=1
net->learning_rate = option_find_float(options, "learning_rate", .001);//learning rate = 0.001
net->momentum = option_find_float(options, "momentum", .9);
net->decay = option_find_float(options, "decay", .0001);//decay=0.0005
int subdivs = option_find_int(options, "subdivisions",1);//train:batch=64,subdivisions=16
net->time_steps = option_find_int_quiet(options, "time_steps",1);//time-steps=1
net->notruth = option_find_int_quiet(options, "notruth",0);
net->batch /= subdivs;//test:batch=1 train:batch=4
net->batch *= net->time_steps;//test:batch *=time_step=1
net->subdivisions = subdivs;
net->random = option_find_int_quiet(options, "random", 0);

net->adam = option_find_int_quiet(options, "adam", 0);

载入参数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
//net=yolov3.cfg文件
//filename=yolo3.weights
void load_weights_upto(network *net, char *filename, int start, int cutoff)
{
for(i = start; i < net->n && i < cutoff; ++i){
layer l = net->layers[i];
if (l.dontload) continue;
if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){

}
if(l.type == CONNECTED){

}
if(l.type == BATCHNORM){

}
if(l.type == CRNN){

}
if(l.type == RNN){
}
if (l.type == LSTM) {
}
}

可见,load weight时候只加载卷积层的参数。

TEST

detector.c/test_detector函数

1
2
3
4
5
6
7
8
9
10
11
12
13
list *options = read_data_cfg(datacfg);
//cfgfile=cfg/yolov3.cfg, weightfile = yolov3.weights,
//加载训练的模型文件的超参数
network *net = load_network(cfgfile, weightfile, 0);
//加载图片,默认当彩色处理
image im = load_image_color(input,0,0);
//调整图片尺寸
image sized = letterbox_image(im, net->w, net->h);

float *X = sized.data;
time=what_time_is_it_now();
//预测
network_predict(net, X);

network.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
//network_predict(net, X);
//cfgfile=cfg/yolov3.cfg, weightfile = yolov3.weights,
//加载训练的模型文件的超参数
//network *net = load_network(cfgfile, weightfile, 0);
// float *X = sized.data;
float *network_predict(network *net, float *input)
{
network orig = *net;
//网络输入为图片:dog.jpg
net->input = input;
net->truth = 0;
net->train = 0;
net->delta = 0;
forward_network(net);
float *out = net->output;
*net = orig;
return out;
}
void forward_network(network *netp)
{

network net = *netp;
int i;
for(i = 0; i < net.n; ++i){
net.index = i;
layer l = net.layers[i];
if(l.delta){
fill_cpu(l.outputs * l.batch, 0, l.delta, 1);
}
l.forward(l, net);
net.input = l.output;
if(l.truth) {
net.truth = l.output;
}
}
calc_network_cost(netp);
}
struct layer{
LAYER_TYPE type;
ACTIVATION activation;
COST_TYPE cost_type;
void (*forward) (struct layer, struct network);
void (*backward) (struct layer, struct network);
void (*update) (struct layer, update_args);
void (*forward_gpu) (struct layer, struct network);
void (*backward_gpu) (struct layer, struct network);
void (*update_gpu) (struct layer, update_args);
int batch_normalize;
.....
}

network.c/load_network函数

1
2
3
4
5
6
7
8
9
10
11
12
network *load_network(char *cfg, char *weights, int clear)
{
//解析yolov3.cfg文件,主要包括网络模型结构,网络层数,
//每层网络参数类型,参数
network *net = parse_network_cfg(cfg);
if(weights && weights[0] != 0){
//加载预训练的权重
load_weights(net, weights);
}
if(clear) (*net->seen) = 0;
return net;
}

parser.c/parse_network_cfg函数

主要类型: //yolo的配置文件中,只有convolutional,yolo,upsample,route这几种类型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
while(n){
params.index = count;
fprintf(stderr, "%5d ", count);
s = (section *)n->val;
options = s->options;
layer l = {0};
LAYER_TYPE lt = string_to_layer_type(s->type);
//yolo的配置文件中,只有convolutional,yolo,upsample,route这几种类型
//convolutional
if(lt == CONVOLUTIONAL){
l = parse_convolutional(options, params);
}
//yolo
else if(lt == YOLO){
l = parse_yolo(options, params);
}
//route
else if(lt == ROUTE){
l = parse_route(options, params, net);
}
//upsample
else if(lt == UPSAMPLE){
l = parse_upsample(options, params, net);
}
//resnet block
else if(lt == SHORTCUT){
l = parse_shortcut(options, params, net);
}else{
fprintf(stderr, "Type not recognized: %s\n", s->type);
}

分别看这四种层:

parse.c/convolutional_layer parse_convolutional函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
//[convolutional]
// batch_normalize = 1
// filters = 32
// size = 3
// stride = 1
// pad = 1
// activation = leaky
convolutional_layer parse_convolutional(list *options, size_params params)
{
int n = option_find_int(options, "filters",1);//滤波器个数
int size = option_find_int(options, "size",1);//滤波器尺寸
int stride = option_find_int(options, "stride",1);//步长
int pad = option_find_int_quiet(options, "pad",0);//0填充
int padding = option_find_int_quiet(options, "padding",0);//无
int groups = option_find_int_quiet(options, "groups", 1);
if(pad) padding = size/2;//pad=1,则padding=size/2=3/2=1

char *activation_s = option_find_str(options, "activation", "logistic");
ACTIVATION activation = get_activation(activation_s);//LEAKY

int batch,h,w,c;
h = params.h;
w = params.w;
c = params.c;
batch=params.batch;
if(!(h && w && c)) error("Layer before convolutional layer must output image.");
int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
int binary = option_find_int_quiet(options, "binary", 0);
int xnor = option_find_int_quiet(options, "xnor", 0);

convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam);
layer.flipped = option_find_int_quiet(options, "flipped", 0);
layer.dot = option_find_float_quiet(options, "dot", 0);

return layer;
}

convolutional_layer.c

/make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam)函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/*
**
**
** 输入:batch 每个batch含有的图片数
** h 图片高度(行数)
** w 图片宽度(列数)
c 输入图片通道数
n 卷积核个数
size 卷积核尺寸
stride 跨度
padding 四周补0长度
activation 激活函数类别
batch_normalize 是否进行BN(规范化)
binary 是否对权重进行二值化
xnor 是否对权重以及输入进行二值化
adam 使用
*/
//传入的参数:batch,h,w,c,n,groups,size,stride,padding,activation, batch_normalize, binary, xnor, params.net->adam
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int groups, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam)
{
int i;
//convolutional_layer是使用typerdef定义的layer的别名
convolutional_layer l = {0};
l.type = CONVOLUTIONAL;//属性,卷积层

l.groups = groups;//1
l.h = h;
l.w = w;
l.c = c;
l.n = n;//卷积核个数
l.binary = binary;//是否对权重进行二值化
l.xnor = xnor;//是否对权重及输入进行二值化
l.batch = batch;//每个batch包含图片数,1
l.stride = stride;
l.size = size;
l.pad = padding;
l.batch_normalize = batch_normalize;
//该层总的权重个数=输入滤波器个数*滤波器通道数*滤波器尺寸
l.weights = calloc(c/groups*n*size*size, sizeof(float));
l.weight_updates = calloc(c/groups*n*size*size, sizeof(float));
//Wx+b中的b,有几个卷积核就有多少个偏置。
//与W的个数一一对应,每个W的元素个数为c*size*size)
l.biases = calloc(n, sizeof(float));
l.bias_updates = calloc(n, sizeof(float));

//weight的个数和偏置的个数。
//因为每个卷积核同时作用于输入数据的各个通道,因此卷积核是三维,
l.nweights = c/groups*n*size*size;
l.nbiases = n;

// float scale = 1./sqrt(size*size*c);
//随机初始化的神经元的输出分布随着输入的增加会产生方差,
//所以我们需要将神经元的输出的方差归一化到1,通过归一化权值向量。
//当rand_normal是标准正态分布随机数函数,scale为sqrt(1/n),当考虑到relu时,scale变为sqrt(2/n)
//n为输入参数个数,即该卷积核的权重的个数size*size*c
float scale = sqrt(2./(size*size*c/l.groups));
//printf("convscale %f\n", scale);
//scale = .02;
//for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);

//
for(i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_normal();
//计算该层的输出宽高
int out_w = convolutional_out_width(l);
int out_h = convolutional_out_height(l);
l.out_h = out_h;
l.out_w = out_w;
l.out_c = n;
l.outputs = l.out_h * l.out_w * l.out_c;//输出特征图的尺寸416*416*32
l.inputs = l.w * l.h * l.c;

l.output = calloc(l.batch*l.outputs, sizeof(float));//所有输入图片的输出,
l.delta = calloc(l.batch*l.outputs, sizeof(float));

l.forward = forward_convolutional_layer;
l.backward = backward_convolutional_layer;
l.update = update_convolutional_layer;

if(batch_normalize){
l.scales = calloc(n, sizeof(float));
l.scale_updates = calloc(n, sizeof(float));
for(i = 0; i < n; ++i){
l.scales[i] = 1;
}

l.mean = calloc(n, sizeof(float));
l.variance = calloc(n, sizeof(float));

l.mean_delta = calloc(n, sizeof(float));
l.variance_delta = calloc(n, sizeof(float));

l.rolling_mean = calloc(n, sizeof(float));
l.rolling_variance = calloc(n, sizeof(float));
l.x = calloc(l.batch*l.outputs, sizeof(float));
l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
}

l.workspace_size = get_workspace_size(l);
l.activation = activation;

fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BFLOPs\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, (2.0 * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w)/1000000000.);

return l;
}

先说前向传播函数:void forward_convolutional_layer(convolutional_layer l, network net)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
void forward_convolutional_layer(convolutional_layer l, network net)
{
int i, j;

fill_cpu(l.outputs*l.batch, 0, l.output, 1);

if(l.xnor){
binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights);
swap_binary(&l);
binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input);
net.input = l.binary_input;
}

int m = l.n/l.groups;
int k = l.size*l.size*l.c/l.groups;
int n = l.out_w*l.out_h;
for(i = 0; i < l.batch; ++i){
for(j = 0; j < l.groups; ++j){
float *a = l.weights + j*l.nweights/l.groups;
float *b = net.workspace;
float *c = l.output + (i*l.groups + j)*n*m;

im2col_cpu(net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w,
l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
}
}

if(l.batch_normalize){
forward_batchnorm_layer(l, net);
} else {
add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
}

activate_array(l.output, l.outputs*l.batch, l.activation);
//static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}//leaky relu:x>0,y=x;x<=0,y=0.1x;
//static inline float linear_activate(float x){return x;}//y=x
//static inline float logistic_activate(float x){return 1./(1. + exp(-x));}//sigmod函数


if(l.binary || l.xnor) swap_binary(&l);
}

gemm函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
/*
** 功能:矩阵计算,完成C = ALPHA * A * B + BETA * C矩阵计算,最后的输出为C
** 输入:
** TA,TB 是否需要对A,B做转置操作,是为1,否为0(要不要转置取决于A,B之间维度是否匹配,比如A:3*2,B:4*2,则需要对B转置,才满足矩阵乘法维度匹配规则)
** M A,C的行数(若A需要转置,则此处给出转置后的A即A'的行数,而不是转置前的)
** N B,C的列数(若B需要转置,则此处给出转置后的B即B'的列数,而不是转置前的)
** K A的列数,B的行数(同样,若A与B中的二者或者其中一个需要转置,则不管怎样,转置后的A,B必须行列能够匹配,符合矩阵乘法规则,K也是转置后的值,不是转置前的)
** A,B,C 输入矩阵(一维数组格式)
** ALPHA 系数
** BETA 系数
** lda A的列数(不做转置)或者行数(做转置,且给的是转置后A即A'的行数)
** ldb B的列数(不做转置)或者行数(做转置,且给的是转置后B即B'的行数)
** ldc C的列数
** 说明:如果TA = 0, TB = 0,那么计算的是C = ALPHA * A * B + BETA * C,此时M是A,C的行数,N是B,C的列数,K是A的列数、B的行数,lda是A的列数,ldb是B的列数;
** 如果TA = 1, TB = 0,那么计算的是C = ALPHA * A' * B + BETA * C,此时M是A’,C的行数,N是B,C的列数,K是A'的列数、B的行数,lda是A'的行数,ldb是B的列数;
** 如果TA = 0, TB = 1,那么计算的是C = ALPHA * A * B' + BETA * C,此时M是A,C的行数,N是B',C的列数,K是A的列数、B'的行数,lda是A的列数,ldb是B'的行数;
** 如果TA = 1, TB = 1,那么计算的是C = ALPHA * A' * B' + BETA * C,此时M是A’,C的行数,N是B',C的列数,K是A'的列数、B'的行数,lda是A'的行数,ldb是B'的行数;
** 总之,参与计算的矩阵必须满足矩阵行列匹配规则。比如A为2*3,B为3*2,C为2*2,那么就是第一种情况;而如果A为3*2,B为3*2,C为2*2,
** 那么就是第二种情况;如果A为2*3,B为2*3,C为2*2,对应第三种情况;如果A为2*3,B为2*3,C为2*2,对应第四种情况。
** 链接:此函数是用C实现矩阵乘法运算,这部分代码应该是模仿的Caffe中的math_functions.cpp的代码
** 参考博客:http://www.voidcn.com/blog/thy_2014/article/p-6149690.html
** 举例说明: 这个函数比较难以理解的地方在于A,B有没有转置这个问题上。首先要清楚,虽然这里A,B,C都是矩阵,但其实都是用一维数组按行保存的,
** 举个例子,假设: A = [1, 2, 3, 2, 2, 1], B = [2, 0, 1, 1, 2, 1], C = [3, 0, 1, 2] (这些输入是打死不变的,
** 都是一维数组格式),且C为2*2的矩阵,即C = [3, 0; 1, 2],那么要进行C = ALPHA * A * B + BETA * C的计算,
** 必须满足矩阵乘法行列匹配规则,则参与运算的第一个矩阵只能为2*3,第二个只能为3*2,因为A,B的元素个数已经固定为6个。
** 下面分别说明gemm_nn(),gemm_tn(),gemm_nt,gemm_tt()四个函数对该例子的计算。
** 诚如上所述,不管A, B有没有转置,反正最后参与计算的两个矩阵必须前者为2*3,后者为3*2。如果使用gemm_nn(),A,B都没有转置,
** 那么就要求没有转置的A,B分别为2*3,3*2矩阵,则 A = [ 1, 2, 3; 2, 2, 1], B = [2, 0; 1, 1; 2, 1],
** 调用gemm_nn(2, 2, 3, 1, A, 3, B, 2, C, 2)计算得到 C = [13, 5; 9, 5](其中ALPHA = BETA = 1,下同);
** 如果要用gemm_tn()函数,即A需要进行转置之后才能计算,也即转置之后的维度为2*3,而转置之前的维度为3*2,B没有转置,
** 本身就是3*2的矩阵,这样,A = [ 1, 2; 3, 2; 2, 1], A' = [1, 3, 2; 2, 2, 1], B = [2, 0; 1, 1; 2, 1],
** gemm_tn(2, 2, 3, 1, A, 2, B, 2, C, 2)函数实际计算的是A'*B+C的值,注意此时的A与gemm_nn()中的A有什么不同,
** 输入的一维数组还是[1, 2, 3, 2, 2, 1],如前所述,A是按行保存的,因为此时的A本身是一个3*2的矩阵,按照按行保存规则,
** 就是A = [ 1, 2; 3, 2; 2, 1],调用gemm_tn()的时候,M, N, K分别为2, 2, 3,都是最终参与计算的矩阵的行列数,
** 因为此处真正参与计算的是A'与B,所以M为A'的行数,即为2,N为B的列数,即为2,K为A'与B的列数,即为3,而此时lda=2,
** 是因为A进行了转置,因此输入的是A'的行数,而不是列数3,ldb=2,为B的列数,最终计算得到C=[12, 5; 9, 5]。
** 对于gemm_nt()与gemm_tt(),与上分析一样,不再赘述了。此部分注释进行了测试,对应测试文件darknet_test_gemm.c。
** 强调: 这一系列的gemm()函数,都带有叠加效果,也即最终的值是保存在C中,但这种保存并不是擦除式的保存,而是叠加式的保存,也就是说,
** 如果进入gemm()函数之前,如果C的元素已经有值了,那么这些值不会被擦除掉,而是会将其叠加,
** 其实看式子就可以看出来:此函数完成的是C = ALPHA * A * B + BETA * C矩阵运算。
**
*/
// gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
// int m = l.n/l.groups;//该层卷积核个数
// int k = l.size*l.size*l.c/l.groups;//该层卷积核参数个数
// int n = l.out_w*l.out_h;//该层每张特征图的尺寸,即元素个数
void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA,
float *A, int lda,
float *B, int ldb,
float BETA,
float *C, int ldc)
{
//printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc);
int i, j;
for(i = 0; i < M; ++i){
for(j = 0; j < N; ++j){
C[i*ldc + j] *= BETA;
}
}
if(!TA && !TB)
gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
else if(TA && !TB)
gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
else if(!TA && TB)
gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
else
gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
}

gemm_nn函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
// gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
//
//int m = l.n / l.groups;//该层卷积核个数
//int k = l.size*l.size*l.c / l.groups;//每个卷积核参数个数
//int n = l.out_w*l.out_h;//该层每张特征图的尺寸,即元素个数,416*416
//float *a = l.weights + j*l.nweights / l.groups;//
//float *b = net.workspace;
// l.outputs = l.out_h * l.out_w * l.out_c;
//float *c = l.output + (i*l.groups + j)*n*m;
void gemm_nn(int M, int N, int K, float ALPHA, //M:卷积核个数,N:输出特征图尺寸,ALPHA=1
float *A, int lda, //A:l.wieghts lda:k,l.size*l.size*l.c每个卷积核参数个数
float *B, int ldb,//B:l.c*l.size*l.size ldb:n该层每张特征图的尺寸
float *C, int ldc)//C:l.output=l.output_w*l.output_h ldc:n输出特征图尺寸大小
{
/*
** 功能:被gemm_cpu()函数调用,实际完成C = ALPHA * A * B + C 矩阵计算,
** 输出的C也是按行存储(所有行并成一行)
** 输入: A,B,C 输入矩阵(一维数组格式)
** ALPHA 系数
** BETA 系数
** M A,C的行数(不做转置)或者A'的行数(做转置),此处A未转置,故为A的行数
** N B,C的列数(不做转置)或者B'的列数(做转置),此处B未转置,故为B的列数
** K A的列数(不做转置)或者A'的列数(做转置),B的行数(不做转置)或者B'的行数(做转置),此处A,B均未转置,故为A的列数、B的行数
** lda A的列数(不做转置)或者A'的行数(做转置),此处A未转置,故为A的列数
** ldb B的列数(不做转置)或者B'的行数(做转置),此处B未转置,故为B的列数
** ldc C的列数
** 说明1:此函数是用C实现矩阵乘法运算,这部分代码应该是模仿的Caffe中的math_functions.cpp的代码
** 参考博客:http://www.voidcn.com/blog/thy_2014/article/p-6149690.html
** 更为详细的注释参见:gemm_cpu()函数的注释
** 说明2:此函数在gemm_cpu()函数中调用,是其中四种情况之一,A,B都不进行转置
** 函数名称gemm_nn()中的两个nn分别表示not transpose, not transpose
*/
int i,j,k;
#pragma omp parallel for
//M:大循环,卷积核个数
for(i = 0; i < M; ++i){
//K:每个卷积核参数
for(k = 0; k < K; ++k){
register float A_PART = ALPHA*A[i*lda+k];
//N:输出特征图尺寸
for(j = 0; j < N; ++j){
C[i*ldc+j] += A_PART*B[k*ldb+j];
}
}
}
}

im2col函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
//输入: im2col_cpu(data_im:net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w,
//l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
void im2col_cpu(float* data_im,//data_im:net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w,
//一个batch中的第i张图片
int channels, int height, int width,//channels=l.c=3,输入图片的通道,h=w=416,输入图片尺寸
int ksize, int stride, int pad, float* data_col) //ksize=3,stride=1,pad=1,data_col=b即为将im转换为数组后的存储内存
{//channels=32
int c,h,w;
int height_col = (height + 2*pad - ksize) / stride + 1;
int width_col = (width + 2*pad - ksize) / stride + 1;

int channels_col = channels * ksize * ksize;//一个卷积核的参数
// 外循环次数为一个卷积核的尺寸数,循环次数即为最终得到的data_col的总行数
for (c = 0; c < channels_col; ++c) {
//// 列偏移,卷积核是一个二维矩阵,并按行存储在一维数组中,利用求余运算获取对应在卷积核中的列数,比如对于
// 3*3的卷积核(3通道),当c=0时,显然在第一列,当c=5时,显然在第2列,当c=9时,在第二通道上的卷积核的第一列,
// 当c=26时,在第三列(第三通道上)
int w_offset = c % ksize;

//// 行偏移,卷积核是一个二维的矩阵,且是按行(卷积核所有行并成一行)存储在一维数组中的,
// 比如对于3*3的卷积核,处理3通道的图像,那么一个卷积核具有27个元素,每9个元素对应一个通道上的卷积核(互为一样),
// 每当c为3的倍数,就意味着卷积核换了一行,h_offset取值为0,1,2,对应3*3卷积核中的第1, 2, 3行
int h_offset = (c / ksize) % ksize;

//// 通道偏移,channels_col是多通道的卷积核并在一起的,比如对于3通道,3*3卷积核,每过9个元素就要换一通道数,
// 当c=0~8时,c_im=0;c=9~17时,c_im=1;c=18~26时,c_im=2
int c_im = c / ksize / ksize;

//// 中循环次数等于该层输出图像行数height_col,
for (h = 0; h < height_col; ++h) {
//// 内循环等于该层输出图像列数width_col,
//说明data_col中的每一行存储了一张特征图,这张特征图又是按行存储在data_col中的某行中,
//说明最终得到的data_col总有channels_col行,height_col*width_col列
for (w = 0; w < width_col; ++w) {

//// 由上面可知,对于3*3的卷积核,h_offset取值为0,1,2,当h_offset=0时,会提取出所有与卷积核第一行元素进行运算的像素,
// 依次类推;加上h*stride是对卷积核进行行移位操作,比如卷积核从图像(0,0)位置开始做卷积,那么最先开始涉及(0,0)~(3,3)
// 之间的像素值,若stride=2,那么卷积核进行一次行移位时,下一行的卷积操作是从元素(2,0)(2为图像行号,0为列号)开始
int im_row = h_offset + h * stride;

// 对于3*3的卷积核,w_offset取值也为0,1,2,当w_offset取1时,会提取出所有与卷积核中第2列元素进行运算的像素,
// 实际在做卷积操作时,卷积核对图像逐行扫描做卷积,加上w*stride就是为了做列移位,
// 比如前一次卷积其实像素元素为(0,0),若stride=2,那么下次卷积元素起始像素位置为(0,2)(0为行号,2为列号)
int im_col = w_offset + w * stride;

// col_index为重排后图像中的像素索引,等于c * height_col * width_col + h * width_col +w(还是按行存储,所有通道再并成一行),
// 对应第c通道,h行,w列的元素
int col_index = (c * height_col + h) * width_col + w;
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
im_row, im_col, c_im, pad);
}
}
}
}

TRAIN

detector.c/train_detector函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
 float loss = 0;
loss = train_network(net, train);
if (avg_loss < 0) avg_loss = loss;//float ave_loss=-1
avg_loss = avg_loss*.9 + loss*.1;

i = get_current_batch(net);
printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs);
if(i%100==0){
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s.backup", backup_directory, base);
save_weights(net, buff);
}
if(i%10000==0 || (i < 1000 && i%100 == 0)){
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
}
free_data(train);
}
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
save_weights(net, buff);
}

network.c/train_network函数

1
2
3
4
5
6
7
8
9
10
float train_network_datum(network *net)
{
*net->seen += net->batch;
net->train = 1;
forward_network(net);
backward_network(net);
float error = *net->cost;
if(((*net->seen)/net->batch)%net->subdivisions == 0) update_network(net);
return error;
}

backward_network函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
void backward_network(network *netp)
{
#ifdef GPU
if(netp->gpu_index >= 0){
backward_network_gpu(netp);
return;
}
#endif
network net = *netp;
int i;
network orig = net;
for(i = net.n-1; i >= 0; --i){
layer l = net.layers[i];
if(l.stopbackward) break;
if(i == 0){
net = orig;
}else{
layer prev = net.layers[i-1];
net.input = prev.output;
net.delta = prev.delta;
}
net.index = i;
l.backward(l, net);
}
}

convolutional_layer.c/backward_convolutional_layer函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
void backward_convolutional_layer(convolutional_layer l, network net)
{
int i, j;
int m = l.n/l.groups;
int n = l.size*l.size*l.c/l.groups;
int k = l.out_w*l.out_h;

gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

if(l.batch_normalize){
backward_batchnorm_layer(l, net);
} else {
backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
}

for(i = 0; i < l.batch; ++i){
for(j = 0; j < l.groups; ++j){
float *a = l.delta + (i*l.groups + j)*m*k;
float *b = net.workspace;
float *c = l.weight_updates + j*l.nweights/l.groups;

float *im = net.input+(i*l.groups + j)*l.c/l.groups*l.h*l.w;

im2col_cpu(im, l.c/l.groups, l.h, l.w,
l.size, l.stride, l.pad, b);
gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);

if(net.delta){
a = l.weights + j*l.nweights/l.groups;
b = l.delta + (i*l.groups + j)*m*k;
c = net.workspace;

gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);

col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride,
l.pad, net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w);
}
}
}
}

float error = *net->cost;在哪里计算这个cost的呢,就是在前传的时候,

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

void calc_network_cost(network *netp)
{
network net = *netp;
int i;
float sum = 0;
int count = 0;
for(i = 0; i < net.n; ++i){
if(net.layers[i].cost){
sum += net.layers[i].cost[0];
++count;
}
}
*net.cost = sum/count;
}

yolo层

parse.c/parse_yolo

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
layer parse_yolo(list *options, size_params params)
{
int classes = option_find_int(options, "classes", 20);
int total = option_find_int(options, "num", 1);
int num = total;

char *a = option_find_str(options, "mask", 0);
int *mask = parse_yolo_mask(a, &num);
layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes);
assert(l.outputs == params.inputs);

l.max_boxes = option_find_int_quiet(options, "max",90);
l.jitter = option_find_float(options, "jitter", .2);

l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
l.truth_thresh = option_find_float(options, "truth_thresh", 1);
l.random = option_find_int_quiet(options, "random", 0);

char *map_file = option_find_str(options, "map", 0);
if (map_file) l.map = read_map(map_file);

a = option_find_str(options, "anchors", 0);
if(a){
int len = strlen(a);
int n = 1;
int i;
for(i = 0; i < len; ++i){
if (a[i] == ',') ++n;
}
for(i = 0; i < n; ++i){
float bias = atof(a);
l.biases[i] = bias;
a = strchr(a, ',')+1;
}
}
return l;
}