Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge Changes from Base Repo #3

Merged
merged 8 commits into from
Mar 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Yolo-v3 and Yolo-v2 for Windows and Linux
### (neural network for object detection) - Tensor Cores can be used on [Linux](https://github.com/AlexeyAB/darknet#how-to-compile-on-linux) and [Windows](https://github.com/AlexeyAB/darknet#how-to-compile-on-windows-using-vcpkg)
### (neural network for object detection) - Tensor Cores can be used on [Linux](https://github.com/AlexeyAB/darknet#how-to-compile-on-linux) and [Windows](https://github.com/AlexeyAB/darknet#how-to-compile-on-windows-using-cmake-gui)

More details: http://pjreddie.com/darknet/yolo/

Expand All @@ -8,7 +8,8 @@ More details: http://pjreddie.com/darknet/yolo/
[![TravisCI](https://travis-ci.org/AlexeyAB/darknet.svg?branch=master)](https://travis-ci.org/AlexeyAB/darknet)
[![AppveyorCI](https://ci.appveyor.com/api/projects/status/594bwb5uoc1fxwiu/branch/master?svg=true)](https://ci.appveyor.com/project/AlexeyAB/darknet/branch/master)
[![Contributors](https://img.shields.io/github/contributors/AlexeyAB/Darknet.svg)](https://github.com/AlexeyAB/darknet/graphs/contributors)
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/AlexeyAB/darknet/blob/master/LICENSE)
[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/AlexeyAB/darknet/blob/master/LICENSE)
[![DOI](https://zenodo.org/badge/75388965.svg)](https://zenodo.org/badge/latestdoi/75388965)


* [Requirements (and how to install dependecies)](#requirements)
Expand Down Expand Up @@ -395,7 +396,7 @@ Training Yolo v3:
* https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L610
* https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L696
* https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L783
* change [`filters=255`] to filters=(classes + 5)x3 in the 3 `[convolutional]` before each `[yolo]` layer
* change [`filters=255`] to filters=(classes + 5)x3 in the 3 `[convolutional]` before each `[yolo]` layer, keep in mind that it only has to be the last `[convolutional]` before each of the `[yolo]` layers.
* https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L603
* https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L689
* https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L776
Expand Down
30 changes: 24 additions & 6 deletions include/darknet.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

#define SECRET_NUM -1234

typedef enum { UNUSED_DEF_VAL } UNUSED_ENUM_TYPE;

#ifdef GPU

#include <cuda_runtime.h>
Expand All @@ -42,8 +44,8 @@

#ifdef CUDNN
#include <cudnn.h>
#endif
#endif
#endif // CUDNN
#endif // GPU

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -216,6 +218,7 @@ struct layer {
int batch_normalize;
int shortcut;
int batch;
int dynamic_minibatch;
int forced;
int flipped;
int inputs;
Expand Down Expand Up @@ -495,7 +498,7 @@ struct layer {

size_t workspace_size;

#ifdef GPU
//#ifdef GPU
int *indexes_gpu;

float *z_gpu;
Expand Down Expand Up @@ -610,8 +613,21 @@ struct layer {
cudnnConvolutionBwdDataAlgo_t bd_algo, bd_algo16;
cudnnConvolutionBwdFilterAlgo_t bf_algo, bf_algo16;
cudnnPoolingDescriptor_t poolingDesc;
#else // CUDNN
void* srcTensorDesc, *dstTensorDesc;
void* srcTensorDesc16, *dstTensorDesc16;
void* dsrcTensorDesc, *ddstTensorDesc;
void* dsrcTensorDesc16, *ddstTensorDesc16;
void* normTensorDesc, *normDstTensorDesc, *normDstTensorDescF16;
void* weightDesc, *weightDesc16;
void* dweightDesc, *dweightDesc16;
void* convDesc;
UNUSED_ENUM_TYPE fw_algo, fw_algo16;
UNUSED_ENUM_TYPE bd_algo, bd_algo16;
UNUSED_ENUM_TYPE bf_algo, bf_algo16;
void* poolingDesc;
#endif // CUDNN
#endif // GPU
//#endif // GPU
};


Expand All @@ -625,6 +641,7 @@ typedef struct network {
int n;
int batch;
uint64_t *seen;
int *cur_iteration;
int *t;
float epoch;
int subdivisions;
Expand Down Expand Up @@ -701,7 +718,7 @@ typedef struct network {
float *cost;
float clip;

#ifdef GPU
//#ifdef GPU
//float *input_gpu;
//float *truth_gpu;
float *delta_gpu;
Expand All @@ -722,8 +739,9 @@ typedef struct network {
float *global_delta_gpu;
float *state_delta_gpu;
size_t max_delta_gpu_size;
#endif
//#endif // GPU
int optimized_memory;
int dynamic_minibatch;
size_t workspace_size_limit;
} network;

Expand Down
8 changes: 5 additions & 3 deletions src/batchnorm_layer.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,15 +258,17 @@ void forward_batchnorm_layer_gpu(layer l, network_state state)
fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu);

//fast_v_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.v_cbn_gpu);
int minibatch_index = state.net.current_subdivision + 1;
float alpha = 0.01;
const int minibatch_index = state.net.current_subdivision + 1;
const int max_minibatch_index = state.net.subdivisions;
//printf("\n minibatch_index = %d, max_minibatch_index = %d \n", minibatch_index, max_minibatch_index);
const float alpha = 0.01;

int inverse_variance = 0;
#ifdef CUDNN
inverse_variance = 1;
#endif // CUDNN

fast_v_cbn_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, minibatch_index, l.m_cbn_avg_gpu, l.v_cbn_avg_gpu, l.variance_gpu,
fast_v_cbn_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, minibatch_index, max_minibatch_index, l.m_cbn_avg_gpu, l.v_cbn_avg_gpu, l.variance_gpu,
alpha, l.rolling_mean_gpu, l.rolling_variance_gpu, inverse_variance, .00001);

normalize_scale_bias_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.scales_gpu, l.biases_gpu, l.batch, l.out_c, l.out_h*l.out_w, inverse_variance, .00001f);
Expand Down
2 changes: 1 addition & 1 deletion src/blas.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *varianc

void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, float *m_avg, float *v_avg, float *variance,
void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance,
const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon);
void normalize_scale_bias_gpu(float *x, float *mean, float *variance, float *scales, float *biases, int batch, int filters, int spatial, int inverse_variance, float epsilon);
void compare_2_arrays_gpu(float *one, float *two, int size);
Expand Down
13 changes: 8 additions & 5 deletions src/blas_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters,
}


__global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, float *m_avg, float *v_avg, float *variance,
__global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance,
const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon)
{
const int threads = BLOCK;
Expand Down Expand Up @@ -615,16 +615,19 @@ __global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int f
if (inverse_variance) variance[filter] = 1.0f / sqrtf(variance_tmp + epsilon);
else variance[filter] = variance_tmp;

rolling_mean_gpu[filter] = alpha * mean[filter] + (1 - alpha) * rolling_mean_gpu[filter];
//if (max_minibatch_index == minibatch_index)
{
rolling_mean_gpu[filter] = alpha * mean[filter] + (1 - alpha) * rolling_mean_gpu[filter];

rolling_variance_gpu[filter] = alpha * variance_tmp + (1 - alpha) * rolling_variance_gpu[filter];
rolling_variance_gpu[filter] = alpha * variance_tmp + (1 - alpha) * rolling_variance_gpu[filter];
}
}
}

extern "C" void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, float *m_avg, float *v_avg, float *variance,
extern "C" void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance,
const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon)
{
fast_v_cbn_kernel << <filters, BLOCK, 0, get_cuda_stream() >> >(x, mean, batch, filters, spatial, minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon);
fast_v_cbn_kernel << <filters, BLOCK, 0, get_cuda_stream() >> >(x, mean, batch, filters, spatial, minibatch_index, max_minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon);
CHECK_CUDA(cudaPeekAtLastError());
}

Expand Down
6 changes: 3 additions & 3 deletions src/classifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
calc_topk_for_each = fmax(calc_topk_for_each, 100);
if (i % 10 == 0) {
if (calc_topk) {
fprintf(stderr, "\n (next TOP5 calculation at %d iterations) ", calc_topk_for_each);
if (topk > 0) fprintf(stderr, " Last accuracy TOP5 = %2.2f %% \n", topk * 100);
fprintf(stderr, "\n (next TOP%d calculation at %d iterations) ", topk_data, calc_topk_for_each);
if (topk > 0) fprintf(stderr, " Last accuracy TOP%d = %2.2f %% \n", topk_data, topk * 100);
}

if (net.cudnn_half) {
Expand All @@ -175,7 +175,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
int draw_precision = 0;
if (calc_topk && (i >= calc_topk_for_each || i == net.max_batches)) {
iter_topk = i;
topk = validate_classifier_single(datacfg, cfgfile, weightfile, &net, topk_data); // calc TOP5
topk = validate_classifier_single(datacfg, cfgfile, weightfile, &net, topk_data); // calc TOP-n
printf("\n accuracy %s = %f \n", topk_buff, topk);
draw_precision = 1;
}
Expand Down
6 changes: 3 additions & 3 deletions src/convolutional_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)

//#ifdef CUDNN_HALF
//if (state.use_mixed_precision) {
int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions);
int iteration_num = get_current_iteration(state.net); // (*state.net.seen) / (state.net.batch*state.net.subdivisions);
if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || iteration_num > 3*state.net.burn_in) &&
(l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && !state.train && l.groups <= 1 && l.size > 1)
{
Expand Down Expand Up @@ -671,7 +671,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
float alpha = 1, beta = 0;

//#ifdef CUDNN_HALF
int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions);
int iteration_num = get_current_iteration(state.net); //(*state.net.seen) / (state.net.batch*state.net.subdivisions);
if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || iteration_num > 3*state.net.burn_in) &&
(l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && !state.train && l.groups <= 1 && l.size > 1)
{
Expand Down Expand Up @@ -978,7 +978,7 @@ void assisted_activation2_gpu(float alpha, float *output, float *gt_gpu, float *

void assisted_excitation_forward_gpu(convolutional_layer l, network_state state)
{
const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions);
const int iteration_num = get_current_iteration(state.net); //(*state.net.seen) / (state.net.batch*state.net.subdivisions);

// epoch
//const float epoch = (float)(*state.net.seen) / state.net.train_images_num;
Expand Down
2 changes: 1 addition & 1 deletion src/convolutional_layer.c
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)

if (l->activation == SWISH || l->activation == MISH) l->activation_input = (float*)realloc(l->activation_input, total_batch*l->outputs * sizeof(float));
#ifdef GPU
if (old_w < w || old_h < h) {
if (old_w < w || old_h < h || l->dynamic_minibatch) {
if (l->train) {
cuda_free(l->delta_gpu);
l->delta_gpu = cuda_make_array(l->delta, total_batch*l->outputs);
Expand Down
1 change: 1 addition & 0 deletions src/darknet.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ int main(int argc, char **argv)

#ifndef GPU
gpu_index = -1;
printf(" GPU isn't used \n");
init_cpu();
#else
if(gpu_index >= 0){
Expand Down
Loading