Skip to content

Commit

Permalink
Merge pull request #3 from AlexeyAB/master
Browse files Browse the repository at this point in the history
Merge Changes from Base Repo (AlexeyAB/darknet)
  • Loading branch information
mmaaz60 authored Mar 9, 2020
2 parents d392cbc + b56e8d1 commit 383977f
Show file tree
Hide file tree
Showing 20 changed files with 207 additions and 87 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Yolo-v3 and Yolo-v2 for Windows and Linux
### (neural network for object detection) - Tensor Cores can be used on [Linux](https://github.com/AlexeyAB/darknet#how-to-compile-on-linux) and [Windows](https://github.com/AlexeyAB/darknet#how-to-compile-on-windows-using-vcpkg)
### (neural network for object detection) - Tensor Cores can be used on [Linux](https://github.com/AlexeyAB/darknet#how-to-compile-on-linux) and [Windows](https://github.com/AlexeyAB/darknet#how-to-compile-on-windows-using-cmake-gui)

More details: http://pjreddie.com/darknet/yolo/

Expand All @@ -8,7 +8,8 @@ More details: http://pjreddie.com/darknet/yolo/
[![TravisCI](https://travis-ci.org/AlexeyAB/darknet.svg?branch=master)](https://travis-ci.org/AlexeyAB/darknet)
[![AppveyorCI](https://ci.appveyor.com/api/projects/status/594bwb5uoc1fxwiu/branch/master?svg=true)](https://ci.appveyor.com/project/AlexeyAB/darknet/branch/master)
[![Contributors](https://img.shields.io/github/contributors/AlexeyAB/Darknet.svg)](https://github.com/AlexeyAB/darknet/graphs/contributors)
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/AlexeyAB/darknet/blob/master/LICENSE)
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/AlexeyAB/darknet/blob/master/LICENSE)
[![DOI](https://zenodo.org/badge/75388965.svg)](https://zenodo.org/badge/latestdoi/75388965)


* [Requirements (and how to install dependecies)](#requirements)
Expand Down Expand Up @@ -395,7 +396,7 @@ Training Yolo v3:
* https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L610
* https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L696
* https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L783
* change [`filters=255`] to filters=(classes + 5)x3 in the 3 `[convolutional]` before each `[yolo]` layer
* change [`filters=255`] to filters=(classes + 5)x3 in the 3 `[convolutional]` before each `[yolo]` layer, keep in mind that it only has to be the last `[convolutional]` before each of the `[yolo]` layers.
* https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L603
* https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L689
* https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L776
Expand Down
30 changes: 24 additions & 6 deletions include/darknet.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

#define SECRET_NUM -1234

typedef enum { UNUSED_DEF_VAL } UNUSED_ENUM_TYPE;

#ifdef GPU

#include <cuda_runtime.h>
Expand All @@ -42,8 +44,8 @@

#ifdef CUDNN
#include <cudnn.h>
#endif
#endif
#endif // CUDNN
#endif // GPU

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -216,6 +218,7 @@ struct layer {
int batch_normalize;
int shortcut;
int batch;
int dynamic_minibatch;
int forced;
int flipped;
int inputs;
Expand Down Expand Up @@ -495,7 +498,7 @@ struct layer {

size_t workspace_size;

#ifdef GPU
//#ifdef GPU
int *indexes_gpu;

float *z_gpu;
Expand Down Expand Up @@ -610,8 +613,21 @@ struct layer {
cudnnConvolutionBwdDataAlgo_t bd_algo, bd_algo16;
cudnnConvolutionBwdFilterAlgo_t bf_algo, bf_algo16;
cudnnPoolingDescriptor_t poolingDesc;
#else // CUDNN
void* srcTensorDesc, *dstTensorDesc;
void* srcTensorDesc16, *dstTensorDesc16;
void* dsrcTensorDesc, *ddstTensorDesc;
void* dsrcTensorDesc16, *ddstTensorDesc16;
void* normTensorDesc, *normDstTensorDesc, *normDstTensorDescF16;
void* weightDesc, *weightDesc16;
void* dweightDesc, *dweightDesc16;
void* convDesc;
UNUSED_ENUM_TYPE fw_algo, fw_algo16;
UNUSED_ENUM_TYPE bd_algo, bd_algo16;
UNUSED_ENUM_TYPE bf_algo, bf_algo16;
void* poolingDesc;
#endif // CUDNN
#endif // GPU
//#endif // GPU
};


Expand All @@ -625,6 +641,7 @@ typedef struct network {
int n;
int batch;
uint64_t *seen;
int *cur_iteration;
int *t;
float epoch;
int subdivisions;
Expand Down Expand Up @@ -701,7 +718,7 @@ typedef struct network {
float *cost;
float clip;

#ifdef GPU
//#ifdef GPU
//float *input_gpu;
//float *truth_gpu;
float *delta_gpu;
Expand All @@ -722,8 +739,9 @@ typedef struct network {
float *global_delta_gpu;
float *state_delta_gpu;
size_t max_delta_gpu_size;
#endif
//#endif // GPU
int optimized_memory;
int dynamic_minibatch;
size_t workspace_size_limit;
} network;

Expand Down
8 changes: 5 additions & 3 deletions src/batchnorm_layer.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,15 +258,17 @@ void forward_batchnorm_layer_gpu(layer l, network_state state)
fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu);

//fast_v_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.v_cbn_gpu);
int minibatch_index = state.net.current_subdivision + 1;
float alpha = 0.01;
const int minibatch_index = state.net.current_subdivision + 1;
const int max_minibatch_index = state.net.subdivisions;
//printf("\n minibatch_index = %d, max_minibatch_index = %d \n", minibatch_index, max_minibatch_index);
const float alpha = 0.01;

int inverse_variance = 0;
#ifdef CUDNN
inverse_variance = 1;
#endif // CUDNN

fast_v_cbn_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, minibatch_index, l.m_cbn_avg_gpu, l.v_cbn_avg_gpu, l.variance_gpu,
fast_v_cbn_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, minibatch_index, max_minibatch_index, l.m_cbn_avg_gpu, l.v_cbn_avg_gpu, l.variance_gpu,
alpha, l.rolling_mean_gpu, l.rolling_variance_gpu, inverse_variance, .00001);

normalize_scale_bias_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.scales_gpu, l.biases_gpu, l.batch, l.out_c, l.out_h*l.out_w, inverse_variance, .00001f);
Expand Down
2 changes: 1 addition & 1 deletion src/blas.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *varianc

void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, float *m_avg, float *v_avg, float *variance,
void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance,
const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon);
void normalize_scale_bias_gpu(float *x, float *mean, float *variance, float *scales, float *biases, int batch, int filters, int spatial, int inverse_variance, float epsilon);
void compare_2_arrays_gpu(float *one, float *two, int size);
Expand Down
13 changes: 8 additions & 5 deletions src/blas_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters,
}


__global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, float *m_avg, float *v_avg, float *variance,
__global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance,
const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon)
{
const int threads = BLOCK;
Expand Down Expand Up @@ -615,16 +615,19 @@ __global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int f
if (inverse_variance) variance[filter] = 1.0f / sqrtf(variance_tmp + epsilon);
else variance[filter] = variance_tmp;

rolling_mean_gpu[filter] = alpha * mean[filter] + (1 - alpha) * rolling_mean_gpu[filter];
//if (max_minibatch_index == minibatch_index)
{
rolling_mean_gpu[filter] = alpha * mean[filter] + (1 - alpha) * rolling_mean_gpu[filter];

rolling_variance_gpu[filter] = alpha * variance_tmp + (1 - alpha) * rolling_variance_gpu[filter];
rolling_variance_gpu[filter] = alpha * variance_tmp + (1 - alpha) * rolling_variance_gpu[filter];
}
}
}

extern "C" void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, float *m_avg, float *v_avg, float *variance,
extern "C" void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance,
const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon)
{
fast_v_cbn_kernel << <filters, BLOCK, 0, get_cuda_stream() >> >(x, mean, batch, filters, spatial, minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon);
fast_v_cbn_kernel << <filters, BLOCK, 0, get_cuda_stream() >> >(x, mean, batch, filters, spatial, minibatch_index, max_minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon);
CHECK_CUDA(cudaPeekAtLastError());
}

Expand Down
6 changes: 3 additions & 3 deletions src/classifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
calc_topk_for_each = fmax(calc_topk_for_each, 100);
if (i % 10 == 0) {
if (calc_topk) {
fprintf(stderr, "\n (next TOP5 calculation at %d iterations) ", calc_topk_for_each);
if (topk > 0) fprintf(stderr, " Last accuracy TOP5 = %2.2f %% \n", topk * 100);
fprintf(stderr, "\n (next TOP%d calculation at %d iterations) ", topk_data, calc_topk_for_each);
if (topk > 0) fprintf(stderr, " Last accuracy TOP%d = %2.2f %% \n", topk_data, topk * 100);
}

if (net.cudnn_half) {
Expand All @@ -175,7 +175,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
int draw_precision = 0;
if (calc_topk && (i >= calc_topk_for_each || i == net.max_batches)) {
iter_topk = i;
topk = validate_classifier_single(datacfg, cfgfile, weightfile, &net, topk_data); // calc TOP5
topk = validate_classifier_single(datacfg, cfgfile, weightfile, &net, topk_data); // calc TOP-n
printf("\n accuracy %s = %f \n", topk_buff, topk);
draw_precision = 1;
}
Expand Down
6 changes: 3 additions & 3 deletions src/convolutional_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)

//#ifdef CUDNN_HALF
//if (state.use_mixed_precision) {
int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions);
int iteration_num = get_current_iteration(state.net); // (*state.net.seen) / (state.net.batch*state.net.subdivisions);
if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || iteration_num > 3*state.net.burn_in) &&
(l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && !state.train && l.groups <= 1 && l.size > 1)
{
Expand Down Expand Up @@ -671,7 +671,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
float alpha = 1, beta = 0;

//#ifdef CUDNN_HALF
int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions);
int iteration_num = get_current_iteration(state.net); //(*state.net.seen) / (state.net.batch*state.net.subdivisions);
if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || iteration_num > 3*state.net.burn_in) &&
(l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && !state.train && l.groups <= 1 && l.size > 1)
{
Expand Down Expand Up @@ -978,7 +978,7 @@ void assisted_activation2_gpu(float alpha, float *output, float *gt_gpu, float *

void assisted_excitation_forward_gpu(convolutional_layer l, network_state state)
{
const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions);
const int iteration_num = get_current_iteration(state.net); //(*state.net.seen) / (state.net.batch*state.net.subdivisions);

// epoch
//const float epoch = (float)(*state.net.seen) / state.net.train_images_num;
Expand Down
2 changes: 1 addition & 1 deletion src/convolutional_layer.c
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)

if (l->activation == SWISH || l->activation == MISH) l->activation_input = (float*)realloc(l->activation_input, total_batch*l->outputs * sizeof(float));
#ifdef GPU
if (old_w < w || old_h < h) {
if (old_w < w || old_h < h || l->dynamic_minibatch) {
if (l->train) {
cuda_free(l->delta_gpu);
l->delta_gpu = cuda_make_array(l->delta, total_batch*l->outputs);
Expand Down
1 change: 1 addition & 0 deletions src/darknet.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ int main(int argc, char **argv)

#ifndef GPU
gpu_index = -1;
printf(" GPU isn't used \n");
init_cpu();
#else
if(gpu_index >= 0){
Expand Down
Loading

0 comments on commit 383977f

Please sign in to comment.