From b1e850e556c285ed103516b61042e1394b3f5dee Mon Sep 17 00:00:00 2001
From: jbagnatoMacPro <nachobagnato@gmail.com>
Date: Fri, 19 Jun 2020 16:06:19 +0200
Subject: [PATCH] Ejercicio Deteccion de objetos yolo con keras y tensorflow

---
 Ejercicio_Object_Detection.ipynb | 1795 ++++++++++++++++++++++++++++++
 1 file changed, 1795 insertions(+)
 create mode 100644 Ejercicio_Object_Detection.ipynb

diff --git a/Ejercicio_Object_Detection.ipynb b/Ejercicio_Object_Detection.ipynb
new file mode 100644
index 000000000..cda4d17ae
--- /dev/null
+++ b/Ejercicio_Object_Detection.ipynb
@@ -0,0 +1,1795 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Entrena tu Dataset para Detección de Objetos"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Consigue el dataset con las foto de Lego y sus anotaciones comprando el libro https://leanpub.com/aprendeml/ (puedes descargarlo gratis)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Vision Por Ordenador en Machine Learning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-14T21:09:36.457551Z",
+     "start_time": "2020-06-14T21:09:36.451976Z"
+    }
+   },
+   "source": [
+    "Vamos a Crear una Red Neuronal para Detectar personajes de lego en imagenes, camara ó video.\n",
+    "\n",
+    "El articulo en el blog www.aprendemachinelearning.com\n",
+    "\n",
+    "Empecemos,"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#pip install tensorflow==1.13.2\n",
+    "#pip install keras==2.0.8\n",
+    "#pip install imgaug==0.2.5\n",
+    "#pip install opencv-python\n",
+    "#pip install h5py\n",
+    "#pip install tqdm\n",
+    "#pip install imutils\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T17:03:38.758432Z",
+     "start_time": "2020-06-18T17:03:08.179312Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import argparse\n",
+    "import os\n",
+    "import numpy as np\n",
+    "import json\n",
+    "import cv2\n",
+    "import copy\n",
+    "import imgaug as ia\n",
+    "from imgaug import augmenters as iaa\n",
+    "from keras.utils import Sequence\n",
+    "import xml.etree.ElementTree as ET\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Definamos directorio de Annotations xml e imagenes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T21:14:04.460356Z",
+     "start_time": "2020-06-18T21:14:04.455123Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "xml_dir = \"annotation/lego4/\"  # directorio que contiene los xml\n",
+    "img_dir = \"images/lego4/\"   # directorios con las imagenes\n",
+    "labels = [\"lego\"]\n",
+    "tamanio = 416  # tamanio en pixeles para entrenar la red\n",
+    "mejores_pesos = \"red_lego.h5\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T21:17:57.774752Z",
+     "start_time": "2020-06-18T21:17:57.758916Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def leer_annotations(ann_dir, img_dir, labels=[]):\n",
+    "    all_imgs = []\n",
+    "    seen_labels = {}\n",
+    "    \n",
+    "    for ann in [x for x in sorted(os.listdir(ann_dir)) if x.endswith('.xml')] :\n",
+    "        img = {'object':[]}\n",
+    "        \n",
+    "        tree = ET.parse(ann_dir + ann)\n",
+    "        \n",
+    "        for elem in tree.iter():\n",
+    "            if 'filename' in elem.tag:\n",
+    "                img['filename'] = img_dir + elem.text\n",
+    "            if 'width' in elem.tag:\n",
+    "                img['width'] = int(elem.text)\n",
+    "            if 'height' in elem.tag:\n",
+    "                img['height'] = int(elem.text)\n",
+    "            if 'object' in elem.tag or 'part' in elem.tag:\n",
+    "                obj = {}\n",
+    "                \n",
+    "                for attr in list(elem):\n",
+    "                    if 'name' in attr.tag:\n",
+    "                        obj['name'] = attr.text\n",
+    "\n",
+    "                        if obj['name'] in seen_labels:\n",
+    "                            seen_labels[obj['name']] += 1\n",
+    "                        else:\n",
+    "                            seen_labels[obj['name']] = 1\n",
+    "                        \n",
+    "                        if len(labels) > 0 and obj['name'] not in labels:\n",
+    "                            break\n",
+    "                        else:\n",
+    "                            img['object'] += [obj]\n",
+    "                            \n",
+    "                    if 'bndbox' in attr.tag:\n",
+    "                        for dim in list(attr):\n",
+    "                            if 'xmin' in dim.tag:\n",
+    "                                obj['xmin'] = int(round(float(dim.text)))\n",
+    "                            if 'ymin' in dim.tag:\n",
+    "                                obj['ymin'] = int(round(float(dim.text)))\n",
+    "                            if 'xmax' in dim.tag:\n",
+    "                                obj['xmax'] = int(round(float(dim.text)))\n",
+    "                            if 'ymax' in dim.tag:\n",
+    "                                obj['ymax'] = int(round(float(dim.text)))\n",
+    "\n",
+    "        if len(img['object']) > 0:\n",
+    "            all_imgs += [img]\n",
+    "                        \n",
+    "    return all_imgs, seen_labels\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Y las cargamos:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T21:18:00.144781Z",
+     "start_time": "2020-06-18T21:18:00.105268Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "train_imgs, train_labels = leer_annotations(xml_dir, img_dir, labels)\n",
+    "print('imagenes',len(train_imgs), 'labels',len(train_labels))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Separemos en Train y Validation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T21:18:01.825926Z",
+     "start_time": "2020-06-18T21:18:01.816814Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "train_valid_split = int(0.8*len(train_imgs))\n",
+    "np.random.shuffle(train_imgs)\n",
+    "valid_imgs = train_imgs[train_valid_split:]\n",
+    "train_imgs = train_imgs[:train_valid_split]\n",
+    "print('train:',len(train_imgs), 'validate:',len(valid_imgs))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Data Augmentation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "hagamos pequeñas distorciones a las imagenes de entrada para entrenar con mayor variedad y mejorar la precision de la red "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T21:18:05.410047Z",
+     "start_time": "2020-06-18T21:18:05.322797Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "def bbox_iou(box1, box2):\n",
+    "    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])\n",
+    "    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])  \n",
+    "    \n",
+    "    intersect = intersect_w * intersect_h\n",
+    "\n",
+    "    w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin\n",
+    "    w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin\n",
+    "    \n",
+    "    union = w1*h1 + w2*h2 - intersect\n",
+    "    \n",
+    "    return float(intersect) / union\n",
+    "\n",
+    "class BoundBox:\n",
+    "    def __init__(self, xmin, ymin, xmax, ymax, c = None, classes = None):\n",
+    "        self.xmin = xmin\n",
+    "        self.ymin = ymin\n",
+    "        self.xmax = xmax\n",
+    "        self.ymax = ymax\n",
+    "        \n",
+    "        self.c     = c\n",
+    "        self.classes = classes\n",
+    "\n",
+    "        self.label = -1\n",
+    "        self.score = -1\n",
+    "\n",
+    "    def get_label(self):\n",
+    "        if self.label == -1:\n",
+    "            self.label = np.argmax(self.classes)\n",
+    "        \n",
+    "        return self.label\n",
+    "    \n",
+    "    def get_score(self):\n",
+    "        if self.score == -1:\n",
+    "            self.score = self.classes[self.get_label()]\n",
+    "            \n",
+    "        return self.score\n",
+    "\n",
+    "\n",
+    "class BatchGenerator(Sequence):\n",
+    "    def __init__(self, images, \n",
+    "                       config, \n",
+    "                       shuffle=True, \n",
+    "                       jitter=True, \n",
+    "                       norm=None):\n",
+    "        self.generator = None\n",
+    "\n",
+    "        self.images = images\n",
+    "        self.config = config\n",
+    "\n",
+    "        self.shuffle = shuffle\n",
+    "        self.jitter  = jitter\n",
+    "        self.norm    = norm\n",
+    "\n",
+    "        self.anchors = [BoundBox(0, 0, config['ANCHORS'][2*i], config['ANCHORS'][2*i+1]) for i in range(int(len(config['ANCHORS'])//2))]\n",
+    "\n",
+    "        ### augmentors by https://github.com/aleju/imgaug\n",
+    "        sometimes = lambda aug: iaa.Sometimes(0.5, aug)\n",
+    "\n",
+    "        self.aug_pipe = iaa.Sequential(\n",
+    "            [\n",
+    "                sometimes(iaa.Affine()),\n",
+    "                iaa.SomeOf((0, 5),\n",
+    "                    [\n",
+    "                        iaa.OneOf([\n",
+    "                            iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0\n",
+    "                            iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7\n",
+    "                            iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7\n",
+    "                        ]),\n",
+    "                        iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images\n",
+    "                        iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # add gaussian noise to images\n",
+    "                        iaa.OneOf([\n",
+    "                            iaa.Dropout((0.01, 0.1), per_channel=0.5), # randomly remove up to 10% of the pixels\n",
+    "                        ]),\n",
+    "                        iaa.Add((-10, 10), per_channel=0.5), # change brightness of images (by -10 to 10 of original value)\n",
+    "                        iaa.Multiply((0.5, 1.5), per_channel=0.5), # change brightness of images (50-150% of original value)\n",
+    "                        iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast\n",
+    "                    ],\n",
+    "                    random_order=True\n",
+    "                )\n",
+    "            ],\n",
+    "            random_order=True\n",
+    "        )\n",
+    "\n",
+    "        if shuffle: np.random.shuffle(self.images)\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return int(np.ceil(float(len(self.images))/self.config['BATCH_SIZE']))   \n",
+    "\n",
+    "    def num_classes(self):\n",
+    "        return len(self.config['LABELS'])\n",
+    "\n",
+    "    def size(self):\n",
+    "        return len(self.images)    \n",
+    "\n",
+    "    def load_annotation(self, i):\n",
+    "        annots = []\n",
+    "\n",
+    "        for obj in self.images[i]['object']:\n",
+    "            annot = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], self.config['LABELS'].index(obj['name'])]\n",
+    "            annots += [annot]\n",
+    "\n",
+    "        if len(annots) == 0: annots = [[]]\n",
+    "\n",
+    "        return np.array(annots)\n",
+    "\n",
+    "    def load_image(self, i):\n",
+    "        return cv2.imread(self.images[i]['filename'])\n",
+    "\n",
+    "    def __getitem__(self, idx):\n",
+    "        l_bound = idx*self.config['BATCH_SIZE']\n",
+    "        r_bound = (idx+1)*self.config['BATCH_SIZE']\n",
+    "\n",
+    "        if r_bound > len(self.images):\n",
+    "            r_bound = len(self.images)\n",
+    "            l_bound = r_bound - self.config['BATCH_SIZE']\n",
+    "\n",
+    "        instance_count = 0\n",
+    "\n",
+    "        x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'], self.config['IMAGE_W'], 3))                         # input images\n",
+    "        b_batch = np.zeros((r_bound - l_bound, 1     , 1     , 1    ,  self.config['TRUE_BOX_BUFFER'], 4))   # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes\n",
+    "        y_batch = np.zeros((r_bound - l_bound, self.config['GRID_H'],  self.config['GRID_W'], self.config['BOX'], 4+1+len(self.config['LABELS'])))                # desired network output\n",
+    "\n",
+    "        for train_instance in self.images[l_bound:r_bound]:\n",
+    "            # augment input image and fix object's position and size\n",
+    "            img, all_objs = self.aug_image(train_instance, jitter=self.jitter)\n",
+    "            \n",
+    "            # construct output from object's x, y, w, h\n",
+    "            true_box_index = 0\n",
+    "            \n",
+    "            for obj in all_objs:\n",
+    "                if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin'] and obj['name'] in self.config['LABELS']:\n",
+    "                    center_x = .5*(obj['xmin'] + obj['xmax'])\n",
+    "                    center_x = center_x / (float(self.config['IMAGE_W']) / self.config['GRID_W'])\n",
+    "                    center_y = .5*(obj['ymin'] + obj['ymax'])\n",
+    "                    center_y = center_y / (float(self.config['IMAGE_H']) / self.config['GRID_H'])\n",
+    "\n",
+    "                    grid_x = int(np.floor(center_x))\n",
+    "                    grid_y = int(np.floor(center_y))\n",
+    "\n",
+    "                    if grid_x < self.config['GRID_W'] and grid_y < self.config['GRID_H']:\n",
+    "                        obj_indx  = self.config['LABELS'].index(obj['name'])\n",
+    "                        \n",
+    "                        center_w = (obj['xmax'] - obj['xmin']) / (float(self.config['IMAGE_W']) / self.config['GRID_W']) # unit: grid cell\n",
+    "                        center_h = (obj['ymax'] - obj['ymin']) / (float(self.config['IMAGE_H']) / self.config['GRID_H']) # unit: grid cell\n",
+    "                        \n",
+    "                        box = [center_x, center_y, center_w, center_h]\n",
+    "\n",
+    "                        # find the anchor that best predicts this box\n",
+    "                        best_anchor = -1\n",
+    "                        max_iou     = -1\n",
+    "                        \n",
+    "                        shifted_box = BoundBox(0, \n",
+    "                                               0,\n",
+    "                                               center_w,                                                \n",
+    "                                               center_h)\n",
+    "                        \n",
+    "                        for i in range(len(self.anchors)):\n",
+    "                            anchor = self.anchors[i]\n",
+    "                            iou    = bbox_iou(shifted_box, anchor)\n",
+    "                            \n",
+    "                            if max_iou < iou:\n",
+    "                                best_anchor = i\n",
+    "                                max_iou     = iou\n",
+    "                                \n",
+    "                        # assign ground truth x, y, w, h, confidence and class probs to y_batch\n",
+    "                        y_batch[instance_count, grid_y, grid_x, best_anchor, 0:4] = box\n",
+    "                        y_batch[instance_count, grid_y, grid_x, best_anchor, 4  ] = 1.\n",
+    "                        y_batch[instance_count, grid_y, grid_x, best_anchor, 5+obj_indx] = 1\n",
+    "                        \n",
+    "                        # assign the true box to b_batch\n",
+    "                        b_batch[instance_count, 0, 0, 0, true_box_index] = box\n",
+    "                        \n",
+    "                        true_box_index += 1\n",
+    "                        true_box_index = true_box_index % self.config['TRUE_BOX_BUFFER']\n",
+    "                            \n",
+    "            # assign input image to x_batch\n",
+    "            if self.norm != None: \n",
+    "                x_batch[instance_count] = self.norm(img)\n",
+    "            else:\n",
+    "                # plot image and bounding boxes for sanity check\n",
+    "                for obj in all_objs:\n",
+    "                    if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin']:\n",
+    "                        cv2.rectangle(img[:,:,::-1], (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)\n",
+    "                        cv2.putText(img[:,:,::-1], obj['name'], \n",
+    "                                    (obj['xmin']+2, obj['ymin']+12), \n",
+    "                                    0, 1.2e-3 * img.shape[0], \n",
+    "                                    (0,255,0), 2)\n",
+    "                        \n",
+    "                x_batch[instance_count] = img\n",
+    "\n",
+    "            # increase instance counter in current batch\n",
+    "            instance_count += 1  \n",
+    "\n",
+    "        #print(' new batch created', idx)\n",
+    "\n",
+    "        return [x_batch, b_batch], y_batch\n",
+    "\n",
+    "    def on_epoch_end(self):\n",
+    "        if self.shuffle: np.random.shuffle(self.images)\n",
+    "\n",
+    "    def aug_image(self, train_instance, jitter):\n",
+    "        image_name = train_instance['filename']\n",
+    "        image = cv2.imread(image_name)\n",
+    "\n",
+    "        if image is None: print('Cannot find ', image_name)\n",
+    "\n",
+    "        h, w, c = image.shape\n",
+    "        all_objs = copy.deepcopy(train_instance['object'])\n",
+    "\n",
+    "        if jitter:\n",
+    "            ### scale the image\n",
+    "            scale = np.random.uniform() / 10. + 1.\n",
+    "            image = cv2.resize(image, (0,0), fx = scale, fy = scale)\n",
+    "\n",
+    "            ### translate the image\n",
+    "            max_offx = (scale-1.) * w\n",
+    "            max_offy = (scale-1.) * h\n",
+    "            offx = int(np.random.uniform() * max_offx)\n",
+    "            offy = int(np.random.uniform() * max_offy)\n",
+    "            \n",
+    "            image = image[offy : (offy + h), offx : (offx + w)]\n",
+    "\n",
+    "            ### flip the image\n",
+    "            flip = np.random.binomial(1, .5)\n",
+    "            if flip > 0.5: image = cv2.flip(image, 1)\n",
+    "                \n",
+    "            image = self.aug_pipe.augment_image(image)            \n",
+    "            \n",
+    "        # resize the image to standard size\n",
+    "        image = cv2.resize(image, (self.config['IMAGE_H'], self.config['IMAGE_W']))\n",
+    "        image = image[:,:,::-1]\n",
+    "\n",
+    "        # fix object's position and size\n",
+    "        for obj in all_objs:\n",
+    "            for attr in ['xmin', 'xmax']:\n",
+    "                if jitter: obj[attr] = int(obj[attr] * scale - offx)\n",
+    "                    \n",
+    "                obj[attr] = int(obj[attr] * float(self.config['IMAGE_W']) / w)\n",
+    "                obj[attr] = max(min(obj[attr], self.config['IMAGE_W']), 0)\n",
+    "                \n",
+    "            for attr in ['ymin', 'ymax']:\n",
+    "                if jitter: obj[attr] = int(obj[attr] * scale - offy)\n",
+    "                    \n",
+    "                obj[attr] = int(obj[attr] * float(self.config['IMAGE_H']) / h)\n",
+    "                obj[attr] = max(min(obj[attr], self.config['IMAGE_H']), 0)\n",
+    "\n",
+    "            if jitter and flip > 0.5:\n",
+    "                xmin = obj['xmin']\n",
+    "                obj['xmin'] = self.config['IMAGE_W'] - obj['xmax']\n",
+    "                obj['xmax'] = self.config['IMAGE_W'] - xmin\n",
+    "                \n",
+    "        return image, all_objs\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Crear el Modelo YOLOv2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Descarga el archivo con los pesos de la red full_yolo_backend.h5 desde https://drive.google.com/file/d/1Q9WhhRlqQbA4jgBkCDrynvgquRXZA_f8/view?usp=sharing\n",
+    "y copialos en este mismo directorio"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T21:18:08.294971Z",
+     "start_time": "2020-06-18T21:18:08.232388Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from keras.models import Model\n",
+    "import tensorflow as tf\n",
+    "from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda\n",
+    "from keras.layers.advanced_activations import LeakyReLU\n",
+    "from keras.layers.merge import concatenate\n",
+    "from keras.optimizers import SGD, Adam, RMSprop\n",
+    "from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard\n",
+    "\n",
+    "\n",
+    "FULL_YOLO_BACKEND_PATH  = \"full_yolo_backend.h5\"   # should be hosted on a server\n",
+    "\n",
+    "class BaseFeatureExtractor(object):\n",
+    "    \"\"\"docstring for ClassName\"\"\"\n",
+    "\n",
+    "    # to be defined in each subclass\n",
+    "    def __init__(self, input_size):\n",
+    "        raise NotImplementedError(\"error message\")\n",
+    "\n",
+    "    # to be defined in each subclass\n",
+    "    def normalize(self, image):\n",
+    "        raise NotImplementedError(\"error message\")       \n",
+    "\n",
+    "    def get_output_shape(self):\n",
+    "        return self.feature_extractor.get_output_shape_at(-1)[1:3]\n",
+    "\n",
+    "    def extract(self, input_image):\n",
+    "        return self.feature_extractor(input_image)\n",
+    "\n",
+    "class FullYoloFeature(BaseFeatureExtractor):\n",
+    "    \"\"\"docstring for ClassName\"\"\"\n",
+    "    def __init__(self, input_size):\n",
+    "        input_image = Input(shape=(input_size, input_size, 3))\n",
+    "\n",
+    "        # the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K)\n",
+    "        def space_to_depth_x2(x):\n",
+    "            return tf.space_to_depth(x, block_size=2)\n",
+    "\n",
+    "        # Layer 1\n",
+    "        x = Conv2D(32, (3,3), strides=(1,1), padding='same', name='conv_1', use_bias=False)(input_image)\n",
+    "        x = BatchNormalization(name='norm_1')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "        x = MaxPooling2D(pool_size=(2, 2))(x)\n",
+    "\n",
+    "        # Layer 2\n",
+    "        x = Conv2D(64, (3,3), strides=(1,1), padding='same', name='conv_2', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_2')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "        x = MaxPooling2D(pool_size=(2, 2))(x)\n",
+    "\n",
+    "        # Layer 3\n",
+    "        x = Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_3', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_3')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 4\n",
+    "        x = Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_4', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_4')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 5\n",
+    "        x = Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_5', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_5')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "        x = MaxPooling2D(pool_size=(2, 2))(x)\n",
+    "\n",
+    "        # Layer 6\n",
+    "        x = Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_6')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 7\n",
+    "        x = Conv2D(128, (1,1), strides=(1,1), padding='same', name='conv_7', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_7')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 8\n",
+    "        x = Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_8', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_8')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "        x = MaxPooling2D(pool_size=(2, 2))(x)\n",
+    "\n",
+    "        # Layer 9\n",
+    "        x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_9', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_9')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 10\n",
+    "        x = Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_10', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_10')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 11\n",
+    "        x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_11', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_11')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 12\n",
+    "        x = Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_12', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_12')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 13\n",
+    "        x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_13', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_13')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        skip_connection = x\n",
+    "\n",
+    "        x = MaxPooling2D(pool_size=(2, 2))(x)\n",
+    "\n",
+    "        # Layer 14\n",
+    "        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_14', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_14')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 15\n",
+    "        x = Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_15', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_15')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 16\n",
+    "        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_16', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_16')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 17\n",
+    "        x = Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_17', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_17')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 18\n",
+    "        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_18', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_18')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 19\n",
+    "        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_19', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_19')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 20\n",
+    "        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_20', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_20')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        # Layer 21\n",
+    "        skip_connection = Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_21', use_bias=False)(skip_connection)\n",
+    "        skip_connection = BatchNormalization(name='norm_21')(skip_connection)\n",
+    "        skip_connection = LeakyReLU(alpha=0.1)(skip_connection)\n",
+    "        skip_connection = Lambda(space_to_depth_x2)(skip_connection)\n",
+    "\n",
+    "        x = concatenate([skip_connection, x])\n",
+    "\n",
+    "        # Layer 22\n",
+    "        x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_22', use_bias=False)(x)\n",
+    "        x = BatchNormalization(name='norm_22')(x)\n",
+    "        x = LeakyReLU(alpha=0.1)(x)\n",
+    "\n",
+    "        self.feature_extractor = Model(input_image, x)\n",
+    "        self.feature_extractor.load_weights(FULL_YOLO_BACKEND_PATH)\n",
+    "\n",
+    "    def normalize(self, image):\n",
+    "        return image / 255.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T21:18:09.266689Z",
+     "start_time": "2020-06-18T21:18:09.236772Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# funciones que necesitaremos\n",
+    "\n",
+    "def _sigmoid(x):\n",
+    "    return 1. / (1. + np.exp(-x))\n",
+    "\n",
+    "def _softmax(x, axis=-1, t=-100.):\n",
+    "    x = x - np.max(x)\n",
+    "    \n",
+    "    if np.min(x) < t:\n",
+    "        x = x/np.min(x)*t\n",
+    "        \n",
+    "    e_x = np.exp(x)\n",
+    "    \n",
+    "    return e_x / e_x.sum(axis, keepdims=True)\n",
+    "\n",
+    "def _interval_overlap(interval_a, interval_b):\n",
+    "    x1, x2 = interval_a\n",
+    "    x3, x4 = interval_b\n",
+    "\n",
+    "    if x3 < x1:\n",
+    "        if x4 < x1:\n",
+    "            return 0\n",
+    "        else:\n",
+    "            return min(x2,x4) - x1\n",
+    "    else:\n",
+    "        if x2 < x3:\n",
+    "             return 0\n",
+    "        else:\n",
+    "            return min(x2,x4) - x3          \n",
+    "\n",
+    "def compute_overlap(a, b):\n",
+    "    \"\"\"\n",
+    "    Code originally from https://github.com/rbgirshick/py-faster-rcnn.\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    a: (N, 4) ndarray of float\n",
+    "    b: (K, 4) ndarray of float\n",
+    "    Returns\n",
+    "    -------\n",
+    "    overlaps: (N, K) ndarray of overlap between boxes and query_boxes\n",
+    "    \"\"\"\n",
+    "    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])\n",
+    "\n",
+    "    iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])\n",
+    "    ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])\n",
+    "\n",
+    "    iw = np.maximum(iw, 0)\n",
+    "    ih = np.maximum(ih, 0)\n",
+    "\n",
+    "    ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih\n",
+    "\n",
+    "    ua = np.maximum(ua, np.finfo(float).eps)\n",
+    "\n",
+    "    intersection = iw * ih\n",
+    "\n",
+    "    return intersection / ua  \n",
+    "    \n",
+    "def compute_ap(recall, precision):\n",
+    "    \"\"\" Compute the average precision, given the recall and precision curves.\n",
+    "    Code originally from https://github.com/rbgirshick/py-faster-rcnn.\n",
+    "\n",
+    "    # Arguments\n",
+    "        recall:    The recall curve (list).\n",
+    "        precision: The precision curve (list).\n",
+    "    # Returns\n",
+    "        The average precision as computed in py-faster-rcnn.\n",
+    "    \"\"\"\n",
+    "    # correct AP calculation\n",
+    "    # first append sentinel values at the end\n",
+    "    mrec = np.concatenate(([0.], recall, [1.]))\n",
+    "    mpre = np.concatenate(([0.], precision, [0.]))\n",
+    "\n",
+    "    # compute the precision envelope\n",
+    "    for i in range(mpre.size - 1, 0, -1):\n",
+    "        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])\n",
+    "\n",
+    "    # to calculate area under PR curve, look for points\n",
+    "    # where X axis (recall) changes value\n",
+    "    i = np.where(mrec[1:] != mrec[:-1])[0]\n",
+    "\n",
+    "    # and sum (\\Delta recall) * prec\n",
+    "    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])\n",
+    "    return ap      \n",
+    "\n",
+    "def decode_netout(netout, anchors, nb_class, obj_threshold=0.3, nms_threshold=0.3):\n",
+    "    grid_h, grid_w, nb_box = netout.shape[:3]\n",
+    "\n",
+    "    boxes = []\n",
+    "    \n",
+    "    # decode the output by the network\n",
+    "    netout[..., 4]  = _sigmoid(netout[..., 4])\n",
+    "    netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])\n",
+    "    netout[..., 5:] *= netout[..., 5:] > obj_threshold\n",
+    "    \n",
+    "    for row in range(grid_h):\n",
+    "        for col in range(grid_w):\n",
+    "            for b in range(nb_box):\n",
+    "                # from 4th element onwards are confidence and class classes\n",
+    "                classes = netout[row,col,b,5:]\n",
+    "                \n",
+    "                if np.sum(classes) > 0:\n",
+    "                    # first 4 elements are x, y, w, and h\n",
+    "                    x, y, w, h = netout[row,col,b,:4]\n",
+    "\n",
+    "                    x = (col + _sigmoid(x)) / grid_w # center position, unit: image width\n",
+    "                    y = (row + _sigmoid(y)) / grid_h # center position, unit: image height\n",
+    "                    w = anchors[2 * b + 0] * np.exp(w) / grid_w # unit: image width\n",
+    "                    h = anchors[2 * b + 1] * np.exp(h) / grid_h # unit: image height\n",
+    "                    confidence = netout[row,col,b,4]\n",
+    "                    \n",
+    "                    box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, confidence, classes)\n",
+    "                    \n",
+    "                    boxes.append(box)\n",
+    "\n",
+    "    # suppress non-maximal boxes\n",
+    "    for c in range(nb_class):\n",
+    "        sorted_indices = list(reversed(np.argsort([box.classes[c] for box in boxes])))\n",
+    "\n",
+    "        for i in range(len(sorted_indices)):\n",
+    "            index_i = sorted_indices[i]\n",
+    "            \n",
+    "            if boxes[index_i].classes[c] == 0: \n",
+    "                continue\n",
+    "            else:\n",
+    "                for j in range(i+1, len(sorted_indices)):\n",
+    "                    index_j = sorted_indices[j]\n",
+    "                    \n",
+    "                    if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_threshold:\n",
+    "                        boxes[index_j].classes[c] = 0\n",
+    "                        \n",
+    "    # remove the boxes which are less likely than a obj_threshold\n",
+    "    boxes = [box for box in boxes if box.get_score() > obj_threshold]\n",
+    "    \n",
+    "    return boxes "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T21:18:10.604188Z",
+     "start_time": "2020-06-18T21:18:10.503708Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "class YOLO(object):\n",
+    "    def __init__(self, \n",
+    "                       input_size, \n",
+    "                       labels, \n",
+    "                       max_box_per_image,\n",
+    "                       anchors):\n",
+    "\n",
+    "        self.input_size = input_size\n",
+    "        \n",
+    "        self.labels   = list(labels)\n",
+    "        self.nb_class = len(self.labels)\n",
+    "        self.nb_box   = len(anchors)//2\n",
+    "        self.class_wt = np.ones(self.nb_class, dtype='float32')\n",
+    "        self.anchors  = anchors\n",
+    "\n",
+    "        self.max_box_per_image = max_box_per_image\n",
+    "\n",
+    "        ##########################\n",
+    "        # Make the model\n",
+    "        ##########################\n",
+    "\n",
+    "        # make the feature extractor layers\n",
+    "        input_image     = Input(shape=(self.input_size, self.input_size, 3))\n",
+    "        self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4))  \n",
+    "\n",
+    "        self.feature_extractor = FullYoloFeature(self.input_size)\n",
+    "\n",
+    "        print(self.feature_extractor.get_output_shape())    \n",
+    "        self.grid_h, self.grid_w = self.feature_extractor.get_output_shape()        \n",
+    "        features = self.feature_extractor.extract(input_image)            \n",
+    "\n",
+    "        # make the object detection layer\n",
+    "        output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), \n",
+    "                        (1,1), strides=(1,1), \n",
+    "                        padding='same', \n",
+    "                        name='DetectionLayer', \n",
+    "                        kernel_initializer='lecun_normal')(features)\n",
+    "        output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output)\n",
+    "        output = Lambda(lambda args: args[0])([output, self.true_boxes])\n",
+    "\n",
+    "        self.model = Model([input_image, self.true_boxes], output)\n",
+    "\n",
+    "        \n",
+    "        # initialize the weights of the detection layer\n",
+    "        layer = self.model.layers[-4]\n",
+    "        weights = layer.get_weights()\n",
+    "\n",
+    "        new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w)\n",
+    "        new_bias   = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w)\n",
+    "\n",
+    "        layer.set_weights([new_kernel, new_bias])\n",
+    "\n",
+    "        # print a summary of the whole model\n",
+    "        self.model.summary()\n",
+    "\n",
+    "    def custom_loss(self, y_true, y_pred):\n",
+    "        mask_shape = tf.shape(y_true)[:4]\n",
+    "        \n",
+    "        cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(self.grid_w), [self.grid_h]), (1, self.grid_h, self.grid_w, 1, 1)))\n",
+    "        cell_y = tf.transpose(cell_x, (0,2,1,3,4))\n",
+    "\n",
+    "        cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [self.batch_size, 1, 1, self.nb_box, 1])\n",
+    "        \n",
+    "        coord_mask = tf.zeros(mask_shape)\n",
+    "        conf_mask  = tf.zeros(mask_shape)\n",
+    "        class_mask = tf.zeros(mask_shape)\n",
+    "        \n",
+    "        seen = tf.Variable(0.)\n",
+    "        total_recall = tf.Variable(0.)\n",
+    "        \n",
+    "        \"\"\"\n",
+    "        Adjust prediction\n",
+    "        \"\"\"\n",
+    "        ### adjust x and y      \n",
+    "        pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid\n",
+    "        \n",
+    "        ### adjust w and h\n",
+    "        pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(self.anchors, [1,1,1,self.nb_box,2])\n",
+    "        \n",
+    "        ### adjust confidence\n",
+    "        pred_box_conf = tf.sigmoid(y_pred[..., 4])\n",
+    "        \n",
+    "        ### adjust class probabilities\n",
+    "        pred_box_class = y_pred[..., 5:]\n",
+    "        \n",
+    "        \"\"\"\n",
+    "        Adjust ground truth\n",
+    "        \"\"\"\n",
+    "        ### adjust x and y\n",
+    "        true_box_xy = y_true[..., 0:2] # relative position to the containing cell\n",
+    "        \n",
+    "        ### adjust w and h\n",
+    "        true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically\n",
+    "        \n",
+    "        ### adjust confidence\n",
+    "        true_wh_half = true_box_wh / 2.\n",
+    "        true_mins    = true_box_xy - true_wh_half\n",
+    "        true_maxes   = true_box_xy + true_wh_half\n",
+    "        \n",
+    "        pred_wh_half = pred_box_wh / 2.\n",
+    "        pred_mins    = pred_box_xy - pred_wh_half\n",
+    "        pred_maxes   = pred_box_xy + pred_wh_half       \n",
+    "        \n",
+    "        intersect_mins  = tf.maximum(pred_mins,  true_mins)\n",
+    "        intersect_maxes = tf.minimum(pred_maxes, true_maxes)\n",
+    "        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)\n",
+    "        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]\n",
+    "        \n",
+    "        true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]\n",
+    "        pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]\n",
+    "\n",
+    "        union_areas = pred_areas + true_areas - intersect_areas\n",
+    "        iou_scores  = tf.truediv(intersect_areas, union_areas)\n",
+    "        \n",
+    "        true_box_conf = iou_scores * y_true[..., 4]\n",
+    "        \n",
+    "        ### adjust class probabilities\n",
+    "        true_box_class = tf.argmax(y_true[..., 5:], -1)\n",
+    "        \n",
+    "        \"\"\"\n",
+    "        Determine the masks\n",
+    "        \"\"\"\n",
+    "        ### coordinate mask: simply the position of the ground truth boxes (the predictors)\n",
+    "        coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * self.coord_scale\n",
+    "        \n",
+    "        ### confidence mask: penelize predictors + penalize boxes with low IOU\n",
+    "        # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6\n",
+    "        true_xy = self.true_boxes[..., 0:2]\n",
+    "        true_wh = self.true_boxes[..., 2:4]\n",
+    "        \n",
+    "        true_wh_half = true_wh / 2.\n",
+    "        true_mins    = true_xy - true_wh_half\n",
+    "        true_maxes   = true_xy + true_wh_half\n",
+    "        \n",
+    "        pred_xy = tf.expand_dims(pred_box_xy, 4)\n",
+    "        pred_wh = tf.expand_dims(pred_box_wh, 4)\n",
+    "        \n",
+    "        pred_wh_half = pred_wh / 2.\n",
+    "        pred_mins    = pred_xy - pred_wh_half\n",
+    "        pred_maxes   = pred_xy + pred_wh_half    \n",
+    "        \n",
+    "        intersect_mins  = tf.maximum(pred_mins,  true_mins)\n",
+    "        intersect_maxes = tf.minimum(pred_maxes, true_maxes)\n",
+    "        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)\n",
+    "        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]\n",
+    "        \n",
+    "        true_areas = true_wh[..., 0] * true_wh[..., 1]\n",
+    "        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]\n",
+    "\n",
+    "        union_areas = pred_areas + true_areas - intersect_areas\n",
+    "        iou_scores  = tf.truediv(intersect_areas, union_areas)\n",
+    "\n",
+    "        best_ious = tf.reduce_max(iou_scores, axis=4)\n",
+    "        conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * self.no_object_scale\n",
+    "        \n",
+    "        # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box\n",
+    "        conf_mask = conf_mask + y_true[..., 4] * self.object_scale\n",
+    "        \n",
+    "        ### class mask: simply the position of the ground truth boxes (the predictors)\n",
+    "        class_mask = y_true[..., 4] * tf.gather(self.class_wt, true_box_class) * self.class_scale       \n",
+    "        \n",
+    "        \"\"\"\n",
+    "        Warm-up training\n",
+    "        \"\"\"\n",
+    "        no_boxes_mask = tf.to_float(coord_mask < self.coord_scale/2.)\n",
+    "        seen = tf.assign_add(seen, 1.)\n",
+    "        \n",
+    "        true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, self.warmup_batches+1), \n",
+    "                              lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, \n",
+    "                                       true_box_wh + tf.ones_like(true_box_wh) * \\\n",
+    "                                       np.reshape(self.anchors, [1,1,1,self.nb_box,2]) * \\\n",
+    "                                       no_boxes_mask, \n",
+    "                                       tf.ones_like(coord_mask)],\n",
+    "                              lambda: [true_box_xy, \n",
+    "                                       true_box_wh,\n",
+    "                                       coord_mask])\n",
+    "        \n",
+    "        \"\"\"\n",
+    "        Finalize the loss\n",
+    "        \"\"\"\n",
+    "        nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))\n",
+    "        nb_conf_box  = tf.reduce_sum(tf.to_float(conf_mask  > 0.0))\n",
+    "        nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))\n",
+    "        \n",
+    "        loss_xy    = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy)     * coord_mask) / (nb_coord_box + 1e-6) / 2.\n",
+    "        loss_wh    = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh)     * coord_mask) / (nb_coord_box + 1e-6) / 2.\n",
+    "        loss_conf  = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask)  / (nb_conf_box  + 1e-6) / 2.\n",
+    "        loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)\n",
+    "        loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)\n",
+    "        \n",
+    "        loss = tf.cond(tf.less(seen, self.warmup_batches+1), \n",
+    "                      lambda: loss_xy + loss_wh + loss_conf + loss_class + 10,\n",
+    "                      lambda: loss_xy + loss_wh + loss_conf + loss_class)\n",
+    "        \n",
+    "        if self.debug:\n",
+    "            nb_true_box = tf.reduce_sum(y_true[..., 4])\n",
+    "            nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3))\n",
+    "            \n",
+    "            current_recall = nb_pred_box/(nb_true_box + 1e-6)\n",
+    "            total_recall = tf.assign_add(total_recall, current_recall) \n",
+    "\n",
+    "            loss = tf.Print(loss, [loss_xy], message='Loss XY \\t', summarize=1000)\n",
+    "            loss = tf.Print(loss, [loss_wh], message='Loss WH \\t', summarize=1000)\n",
+    "            loss = tf.Print(loss, [loss_conf], message='Loss Conf \\t', summarize=1000)\n",
+    "            loss = tf.Print(loss, [loss_class], message='Loss Class \\t', summarize=1000)\n",
+    "            loss = tf.Print(loss, [loss], message='Total Loss \\t', summarize=1000)\n",
+    "            loss = tf.Print(loss, [current_recall], message='Current Recall \\t', summarize=1000)\n",
+    "            loss = tf.Print(loss, [total_recall/seen], message='Average Recall \\t', summarize=1000)\n",
+    "        \n",
+    "        return loss\n",
+    "\n",
+    "    def load_weights(self, weight_path):\n",
+    "        self.model.load_weights(weight_path)\n",
+    "\n",
+    "    def train(self, train_imgs,     # the list of images to train the model\n",
+    "                    valid_imgs,     # the list of images used to validate the model\n",
+    "                    train_times,    # the number of time to repeat the training set, often used for small datasets\n",
+    "                    valid_times,    # the number of times to repeat the validation set, often used for small datasets\n",
+    "                    nb_epochs,      # number of epoches\n",
+    "                    learning_rate,  # the learning rate\n",
+    "                    batch_size,     # the size of the batch\n",
+    "                    warmup_epochs,  # number of initial batches to let the model familiarize with the new dataset\n",
+    "                    object_scale,\n",
+    "                    no_object_scale,\n",
+    "                    coord_scale,\n",
+    "                    class_scale,\n",
+    "                    saved_weights_name='best_weights.h5',\n",
+    "                    debug=False):     \n",
+    "\n",
+    "        self.batch_size = batch_size\n",
+    "\n",
+    "        self.object_scale    = object_scale\n",
+    "        self.no_object_scale = no_object_scale\n",
+    "        self.coord_scale     = coord_scale\n",
+    "        self.class_scale     = class_scale\n",
+    "\n",
+    "        self.debug = debug\n",
+    "\n",
+    "        ############################################\n",
+    "        # Make train and validation generators\n",
+    "        ############################################\n",
+    "\n",
+    "        generator_config = {\n",
+    "            'IMAGE_H'         : self.input_size, \n",
+    "            'IMAGE_W'         : self.input_size,\n",
+    "            'GRID_H'          : self.grid_h,  \n",
+    "            'GRID_W'          : self.grid_w,\n",
+    "            'BOX'             : self.nb_box,\n",
+    "            'LABELS'          : self.labels,\n",
+    "            'CLASS'           : len(self.labels),\n",
+    "            'ANCHORS'         : self.anchors,\n",
+    "            'BATCH_SIZE'      : self.batch_size,\n",
+    "            'TRUE_BOX_BUFFER' : self.max_box_per_image,\n",
+    "        }    \n",
+    "\n",
+    "        train_generator = BatchGenerator(train_imgs, \n",
+    "                                     generator_config, \n",
+    "                                     norm=self.feature_extractor.normalize)\n",
+    "        valid_generator = BatchGenerator(valid_imgs, \n",
+    "                                     generator_config, \n",
+    "                                     norm=self.feature_extractor.normalize,\n",
+    "                                     jitter=False)   \n",
+    "                                     \n",
+    "        self.warmup_batches  = warmup_epochs * (train_times*len(train_generator) + valid_times*len(valid_generator))   \n",
+    "\n",
+    "        ############################################\n",
+    "        # Compile the model\n",
+    "        ############################################\n",
+    "\n",
+    "        optimizer = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)\n",
+    "        self.model.compile(loss=self.custom_loss, optimizer=optimizer)\n",
+    "\n",
+    "        ############################################\n",
+    "        # Make a few callbacks\n",
+    "        ############################################\n",
+    "\n",
+    "        early_stop = EarlyStopping(monitor='val_loss', \n",
+    "                           min_delta=0.001, \n",
+    "                           patience=3, \n",
+    "                           mode='min', \n",
+    "                           verbose=1)\n",
+    "        checkpoint = ModelCheckpoint(saved_weights_name, \n",
+    "                                     monitor='val_loss', \n",
+    "                                     verbose=1, \n",
+    "                                     save_best_only=True, \n",
+    "                                     mode='min', \n",
+    "                                     period=1)\n",
+    "        tensorboard = TensorBoard(log_dir=os.path.expanduser('~/logs/'), \n",
+    "                                  histogram_freq=0, \n",
+    "                                  #write_batch_performance=True,\n",
+    "                                  write_graph=True, \n",
+    "                                  write_images=False)\n",
+    "\n",
+    "        ############################################\n",
+    "        # Start the training process\n",
+    "        ############################################        \n",
+    "\n",
+    "        self.model.fit_generator(generator        = train_generator, \n",
+    "                                 steps_per_epoch  = len(train_generator) * train_times, \n",
+    "                                 epochs           = warmup_epochs + nb_epochs, \n",
+    "                                 verbose          = 2 if debug else 1,\n",
+    "                                 validation_data  = valid_generator,\n",
+    "                                 validation_steps = len(valid_generator) * valid_times,\n",
+    "                                 callbacks        = [early_stop, checkpoint, tensorboard], \n",
+    "                                 workers          = 3,\n",
+    "                                 max_queue_size   = 8)\n",
+    "\n",
+    "        ############################################\n",
+    "        # Compute mAP on the validation set\n",
+    "        ############################################\n",
+    "        average_precisions = self.evaluate(valid_generator)     \n",
+    "\n",
+    "        # print evaluation\n",
+    "        for label, average_precision in average_precisions.items():\n",
+    "            print(self.labels[label], '{:.4f}'.format(average_precision))\n",
+    "        print('mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions)))         \n",
+    "\n",
+    "    def evaluate(self, \n",
+    "                 generator, \n",
+    "                 iou_threshold=0.3,\n",
+    "                 score_threshold=0.3,\n",
+    "                 max_detections=100,\n",
+    "                 save_path=None):\n",
+    "        \"\"\" Evaluate a given dataset using a given model.\n",
+    "        code originally from https://github.com/fizyr/keras-retinanet\n",
+    "\n",
+    "        # Arguments\n",
+    "            generator       : The generator that represents the dataset to evaluate.\n",
+    "            model           : The model to evaluate.\n",
+    "            iou_threshold   : The threshold used to consider when a detection is positive or negative.\n",
+    "            score_threshold : The score confidence threshold to use for detections.\n",
+    "            max_detections  : The maximum number of detections to use per image.\n",
+    "            save_path       : The path to save images with visualized detections to.\n",
+    "        # Returns\n",
+    "            A dict mapping class names to mAP scores.\n",
+    "        \"\"\"    \n",
+    "        # gather all detections and annotations\n",
+    "        all_detections     = [[None for i in range(generator.num_classes())] for j in range(generator.size())]\n",
+    "        all_annotations    = [[None for i in range(generator.num_classes())] for j in range(generator.size())]\n",
+    "\n",
+    "        for i in range(generator.size()):\n",
+    "            raw_image = generator.load_image(i)\n",
+    "            raw_height, raw_width, raw_channels = raw_image.shape\n",
+    "\n",
+    "            # make the boxes and the labels\n",
+    "            pred_boxes  = self.predict(raw_image)\n",
+    "\n",
+    "            \n",
+    "            score = np.array([box.score for box in pred_boxes])\n",
+    "            pred_labels = np.array([box.label for box in pred_boxes])        \n",
+    "            \n",
+    "            if len(pred_boxes) > 0:\n",
+    "                pred_boxes = np.array([[box.xmin*raw_width, box.ymin*raw_height, box.xmax*raw_width, box.ymax*raw_height, box.score] for box in pred_boxes])\n",
+    "            else:\n",
+    "                pred_boxes = np.array([[]])  \n",
+    "            \n",
+    "            # sort the boxes and the labels according to scores\n",
+    "            score_sort = np.argsort(-score)\n",
+    "            pred_labels = pred_labels[score_sort]\n",
+    "            pred_boxes  = pred_boxes[score_sort]\n",
+    "            \n",
+    "            # copy detections to all_detections\n",
+    "            for label in range(generator.num_classes()):\n",
+    "                all_detections[i][label] = pred_boxes[pred_labels == label, :]\n",
+    "                \n",
+    "            annotations = generator.load_annotation(i)\n",
+    "            \n",
+    "            # copy detections to all_annotations\n",
+    "            for label in range(generator.num_classes()):\n",
+    "                all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()\n",
+    "                \n",
+    "        # compute mAP by comparing all detections and all annotations\n",
+    "        average_precisions = {}\n",
+    "        \n",
+    "        for label in range(generator.num_classes()):\n",
+    "            false_positives = np.zeros((0,))\n",
+    "            true_positives  = np.zeros((0,))\n",
+    "            scores          = np.zeros((0,))\n",
+    "            num_annotations = 0.0\n",
+    "\n",
+    "            for i in range(generator.size()):\n",
+    "                detections           = all_detections[i][label]\n",
+    "                annotations          = all_annotations[i][label]\n",
+    "                num_annotations     += annotations.shape[0]\n",
+    "                detected_annotations = []\n",
+    "\n",
+    "                for d in detections:\n",
+    "                    scores = np.append(scores, d[4])\n",
+    "\n",
+    "                    if annotations.shape[0] == 0:\n",
+    "                        false_positives = np.append(false_positives, 1)\n",
+    "                        true_positives  = np.append(true_positives, 0)\n",
+    "                        continue\n",
+    "\n",
+    "                    overlaps            = compute_overlap(np.expand_dims(d, axis=0), annotations)\n",
+    "                    assigned_annotation = np.argmax(overlaps, axis=1)\n",
+    "                    max_overlap         = overlaps[0, assigned_annotation]\n",
+    "\n",
+    "                    if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:\n",
+    "                        false_positives = np.append(false_positives, 0)\n",
+    "                        true_positives  = np.append(true_positives, 1)\n",
+    "                        detected_annotations.append(assigned_annotation)\n",
+    "                    else:\n",
+    "                        false_positives = np.append(false_positives, 1)\n",
+    "                        true_positives  = np.append(true_positives, 0)\n",
+    "\n",
+    "            # no annotations -> AP for this class is 0 (is this correct?)\n",
+    "            if num_annotations == 0:\n",
+    "                average_precisions[label] = 0\n",
+    "                continue\n",
+    "\n",
+    "            # sort by score\n",
+    "            indices         = np.argsort(-scores)\n",
+    "            false_positives = false_positives[indices]\n",
+    "            true_positives  = true_positives[indices]\n",
+    "\n",
+    "            # compute false positives and true positives\n",
+    "            false_positives = np.cumsum(false_positives)\n",
+    "            true_positives  = np.cumsum(true_positives)\n",
+    "\n",
+    "            # compute recall and precision\n",
+    "            recall    = true_positives / num_annotations\n",
+    "            precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)\n",
+    "\n",
+    "            # compute average precision\n",
+    "            average_precision  = compute_ap(recall, precision)  \n",
+    "            average_precisions[label] = average_precision\n",
+    "\n",
+    "        return average_precisions    \n",
+    "\n",
+    "    def predict(self, image):\n",
+    "        image_h, image_w, _ = image.shape\n",
+    "        image = cv2.resize(image, (self.input_size, self.input_size))\n",
+    "        image = self.feature_extractor.normalize(image)\n",
+    "\n",
+    "        input_image = image[:,:,::-1]\n",
+    "        input_image = np.expand_dims(input_image, 0)\n",
+    "        dummy_array = np.zeros((1,1,1,1,self.max_box_per_image,4))\n",
+    "\n",
+    "        netout = self.model.predict([input_image, dummy_array])[0]\n",
+    "        boxes  = decode_netout(netout, self.anchors, self.nb_class)\n",
+    "\n",
+    "        return boxes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Crear las anclas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T21:18:13.677787Z",
+     "start_time": "2020-06-18T21:18:13.625911Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import random\n",
+    "\n",
+    "num_anchors = 5\n",
+    "\n",
+    "def IOU(ann, centroids):\n",
+    "    w, h = ann\n",
+    "    similarities = []\n",
+    "\n",
+    "    for centroid in centroids:\n",
+    "        c_w, c_h = centroid\n",
+    "\n",
+    "        if c_w >= w and c_h >= h:\n",
+    "            similarity = w*h/(c_w*c_h)\n",
+    "        elif c_w >= w and c_h <= h:\n",
+    "            similarity = w*c_h/(w*h + (c_w-w)*c_h)\n",
+    "        elif c_w <= w and c_h >= h:\n",
+    "            similarity = c_w*h/(w*h + c_w*(c_h-h))\n",
+    "        else: #means both w,h are bigger than c_w and c_h respectively\n",
+    "            similarity = (c_w*c_h)/(w*h)\n",
+    "        similarities.append(similarity) # will become (k,) shape\n",
+    "\n",
+    "    return np.array(similarities)\n",
+    "\n",
+    "def avg_IOU(anns, centroids):\n",
+    "    n,d = anns.shape\n",
+    "    sum = 0.\n",
+    "\n",
+    "    for i in range(anns.shape[0]):\n",
+    "        sum+= max(IOU(anns[i], centroids))\n",
+    "\n",
+    "    return sum/n\n",
+    "\n",
+    "def print_anchors(centroids):\n",
+    "    anchors = centroids.copy()\n",
+    "\n",
+    "    widths = anchors[:, 0]\n",
+    "    sorted_indices = np.argsort(widths)\n",
+    "\n",
+    "    r = \"anchors: [\"\n",
+    "    for i in sorted_indices[:-1]:\n",
+    "        r += '%0.2f,%0.2f, ' % (anchors[i,0], anchors[i,1])\n",
+    "\n",
+    "    #there should not be comma after last anchor, that's why\n",
+    "    r += '%0.2f,%0.2f' % (anchors[sorted_indices[-1:],0], anchors[sorted_indices[-1:],1])\n",
+    "    r += \"]\"\n",
+    "\n",
+    "    print(r)\n",
+    "\n",
+    "def run_kmeans(ann_dims, anchor_num):\n",
+    "    ann_num = ann_dims.shape[0]\n",
+    "    iterations = 0\n",
+    "    prev_assignments = np.ones(ann_num)*(-1)\n",
+    "    iteration = 0\n",
+    "    old_distances = np.zeros((ann_num, anchor_num))\n",
+    "\n",
+    "    indices = [random.randrange(ann_dims.shape[0]) for i in range(anchor_num)]\n",
+    "    centroids = ann_dims[indices]\n",
+    "    anchor_dim = ann_dims.shape[1]\n",
+    "\n",
+    "    while True:\n",
+    "        distances = []\n",
+    "        iteration += 1\n",
+    "        for i in range(ann_num):\n",
+    "            d = 1 - IOU(ann_dims[i], centroids)\n",
+    "            distances.append(d)\n",
+    "        distances = np.array(distances) # distances.shape = (ann_num, anchor_num)\n",
+    "\n",
+    "        print(\"iteration {}: dists = {}\".format(iteration, np.sum(np.abs(old_distances-distances))))\n",
+    "\n",
+    "        #assign samples to centroids\n",
+    "        assignments = np.argmin(distances,axis=1)\n",
+    "\n",
+    "        if (assignments == prev_assignments).all() :\n",
+    "            return centroids\n",
+    "\n",
+    "        #calculate new centroids\n",
+    "        centroid_sums=np.zeros((anchor_num, anchor_dim), np.float)\n",
+    "        for i in range(ann_num):\n",
+    "            centroid_sums[assignments[i]]+=ann_dims[i]\n",
+    "        for j in range(anchor_num):\n",
+    "            centroids[j] = centroid_sums[j]/(np.sum(assignments==j) + 1e-6)\n",
+    "\n",
+    "        prev_assignments = assignments.copy()\n",
+    "        old_distances = distances.copy()\n",
+    "\n",
+    "grid_w = tamanio/32\n",
+    "grid_h = tamanio/32\n",
+    "\n",
+    "# run k_mean to find the anchors\n",
+    "annotation_dims = []\n",
+    "for image in train_imgs:\n",
+    "    cell_w = image['width']/grid_w\n",
+    "    cell_h = image['height']/grid_h\n",
+    "\n",
+    "    for obj in image['object']:\n",
+    "        relative_w = (float(obj['xmax']) - float(obj['xmin']))/cell_w\n",
+    "        relatice_h = (float(obj[\"ymax\"]) - float(obj['ymin']))/cell_h\n",
+    "        annotation_dims.append(tuple(map(float, (relative_w,relatice_h))))\n",
+    "\n",
+    "annotation_dims = np.array(annotation_dims)\n",
+    "centroids = run_kmeans(annotation_dims, num_anchors)\n",
+    "\n",
+    "# write anchors to file\n",
+    "print('\\naverage IOU for', num_anchors, 'anchors:', '%0.2f' % avg_IOU(annotation_dims, centroids))\n",
+    "print_anchors(centroids)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T21:18:14.640275Z",
+     "start_time": "2020-06-18T21:18:14.603606Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "#aproximado para lego [1.90,3.02, 3.05,5.06, 4.35,2.91, 4.66,7.49, 7.24,10.12]\n",
+    "anchors = []\n",
+    "for x in centroids:\n",
+    "    anchors.append(x[0])\n",
+    "    anchors.append(x[1])\n",
+    "anchors"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Y... a entrenar la Red Neuronal!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T21:18:31.392052Z",
+     "start_time": "2020-06-18T21:18:18.684936Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(13, 13)\n",
+      "____________________________________________________________________________________________________\n",
+      "Layer (type)                     Output Shape          Param #     Connected to                     \n",
+      "====================================================================================================\n",
+      "input_7 (InputLayer)             (None, 416, 416, 3)   0                                            \n",
+      "____________________________________________________________________________________________________\n",
+      "model_5 (Model)                  (None, 13, 13, 1024)  50547936    input_7[0][0]                    \n",
+      "____________________________________________________________________________________________________\n",
+      "DetectionLayer (Conv2D)          (None, 13, 13, 30)    30750       model_5[1][0]                    \n",
+      "____________________________________________________________________________________________________\n",
+      "reshape_3 (Reshape)              (None, 13, 13, 5, 6)  0           DetectionLayer[0][0]             \n",
+      "____________________________________________________________________________________________________\n",
+      "input_8 (InputLayer)             (None, 1, 1, 1, 5, 4) 0                                            \n",
+      "____________________________________________________________________________________________________\n",
+      "lambda_6 (Lambda)                (None, 13, 13, 5, 6)  0           reshape_3[0][0]                  \n",
+      "                                                                   input_8[0][0]                    \n",
+      "====================================================================================================\n",
+      "Total params: 50,578,686\n",
+      "Trainable params: 50,558,014\n",
+      "Non-trainable params: 20,672\n",
+      "____________________________________________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "# instanciamos al modelo\n",
+    "yolo = YOLO(input_size          = tamanio, \n",
+    "            labels              = labels, \n",
+    "            max_box_per_image   = 5,\n",
+    "            anchors             = anchors)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "ATENCION: al entrenar en mi ordenador CPU 4 núcleos y 8GB ram \n",
+    "\n",
+    "con 6 epochs\n",
+    "\n",
+    "puede tomar unas 7 horas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T21:27:38.931042Z",
+     "start_time": "2020-06-18T21:19:36.249340Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "yolo.train(train_imgs         = train_imgs,\n",
+    "           valid_imgs         = valid_imgs,\n",
+    "           train_times        = 6,\n",
+    "           valid_times        = 1,\n",
+    "           nb_epochs          = 6, \n",
+    "           learning_rate      = 1e-4, \n",
+    "           batch_size         = 8,\n",
+    "           warmup_epochs      = 2,\n",
+    "           object_scale       = 5,\n",
+    "           no_object_scale    = 1,\n",
+    "           coord_scale        = 1,\n",
+    "           class_scale        = 1,\n",
+    "           saved_weights_name = mejores_pesos,\n",
+    "           debug              = True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Prediccion de 1 imagen que la red no ha visto nunca"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"images/test/lego_girl.png\" width=416 height=416 />"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T18:39:23.469061Z",
+     "start_time": "2020-06-18T18:39:23.458089Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def draw_boxes(image, boxes, labels):\n",
+    "    image_h, image_w, _ = image.shape\n",
+    "\n",
+    "    for box in boxes:\n",
+    "        xmin = int(box.xmin*image_w)\n",
+    "        ymin = int(box.ymin*image_h)\n",
+    "        xmax = int(box.xmax*image_w)\n",
+    "        ymax = int(box.ymax*image_h)\n",
+    "\n",
+    "        cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (0,255,0), 3)\n",
+    "        cv2.putText(image, \n",
+    "                    labels[box.get_label()] + ' ' + str(box.get_score()), \n",
+    "                    (xmin, ymin - 13), \n",
+    "                    cv2.FONT_HERSHEY_SIMPLEX, \n",
+    "                    1e-3 * image_h, \n",
+    "                    (0,255,0), 2)\n",
+    "        \n",
+    "    return image          \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T18:40:04.330466Z",
+     "start_time": "2020-06-18T18:39:52.375770Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(13, 13)\n",
+      "____________________________________________________________________________________________________\n",
+      "Layer (type)                     Output Shape          Param #     Connected to                     \n",
+      "====================================================================================================\n",
+      "input_4 (InputLayer)             (None, 416, 416, 3)   0                                            \n",
+      "____________________________________________________________________________________________________\n",
+      "model_3 (Model)                  (None, 13, 13, 1024)  50547936    input_4[0][0]                    \n",
+      "____________________________________________________________________________________________________\n",
+      "DetectionLayer (Conv2D)          (None, 13, 13, 30)    30750       model_3[1][0]                    \n",
+      "____________________________________________________________________________________________________\n",
+      "reshape_2 (Reshape)              (None, 13, 13, 5, 6)  0           DetectionLayer[0][0]             \n",
+      "____________________________________________________________________________________________________\n",
+      "input_5 (InputLayer)             (None, 1, 1, 1, 5, 4) 0                                            \n",
+      "____________________________________________________________________________________________________\n",
+      "lambda_4 (Lambda)                (None, 13, 13, 5, 6)  0           reshape_2[0][0]                  \n",
+      "                                                                   input_5[0][0]                    \n",
+      "====================================================================================================\n",
+      "Total params: 50,578,686\n",
+      "Trainable params: 50,558,014\n",
+      "Non-trainable params: 20,672\n",
+      "____________________________________________________________________________________________________\n",
+      "Detectados 1\n"
+     ]
+    }
+   ],
+   "source": [
+    "mejores_pesos = \"red_lego.h5\"\n",
+    "\n",
+    "image_path = \"images/test/lego_girl.png\"\n",
+    "\n",
+    "mi_yolo = YOLO(input_size          = tamanio, \n",
+    "            labels              = labels, \n",
+    "            max_box_per_image   = 5,\n",
+    "            anchors             = anchors)\n",
+    "\n",
+    "mi_yolo.load_weights(mejores_pesos)\n",
+    "\n",
+    "image = cv2.imread(image_path)\n",
+    "boxes = mi_yolo.predict(image)\n",
+    "image = draw_boxes(image, boxes, labels)\n",
+    "\n",
+    "print('Detectados', len(boxes))\n",
+    "\n",
+    "cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], image)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"images/test/lego_girl_detected.png\" width=416 height=416 />"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Y otro ejemplo sobre el archivo lego_misc.png:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-14T23:30:01.947364Z",
+     "start_time": "2020-06-14T23:30:01.938602Z"
+    }
+   },
+   "source": [
+    "<img src=\"images/test/lego_misc_detected.png\" width=416 height=416 />"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# En un video mp4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T19:24:19.281673Z",
+     "start_time": "2020-06-18T19:14:22.501179Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 791/791 [09:56<00:00,  1.33it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from tqdm import *\n",
+    "\n",
+    "video_path = 'images/test/lego_movie.mp4'\n",
+    "video_out = video_path[:-4] + '_detected' + video_path[-4:]\n",
+    "video_reader = cv2.VideoCapture(video_path)\n",
+    "\n",
+    "nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))\n",
+    "frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))\n",
+    "frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))\n",
+    "\n",
+    "video_writer = cv2.VideoWriter(video_out,\n",
+    "                       cv2.VideoWriter_fourcc(*'MPEG'), \n",
+    "                       50.0, \n",
+    "                       (frame_w, frame_h))\n",
+    "\n",
+    "for i in tqdm(range(nb_frames)):\n",
+    "    _, image = video_reader.read()\n",
+    "    \n",
+    "    boxes = mi_yolo.predict(image)\n",
+    "    image = draw_boxes(image, boxes, labels)\n",
+    "\n",
+    "    video_writer.write(np.uint8(image))\n",
+    "\n",
+    "video_reader.release()\n",
+    "video_writer.release()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Desde tu cámara Web"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## (Para salir, presiona \"q\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-06-18T16:16:57.933534Z",
+     "start_time": "2020-06-18T16:16:41.446306Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "win_name = 'Lego detection'\n",
+    "cv2.namedWindow(win_name)\n",
+    "\n",
+    "video_reader = cv2.VideoCapture(0)\n",
+    "\n",
+    "while True:\n",
+    "    _, image = video_reader.read()\n",
+    "    \n",
+    "    boxes = mi_yolo.predict(image)\n",
+    "    image = draw_boxes(image, boxes, labels)\n",
+    "\n",
+    "    cv2.imshow(win_name, image)\n",
+    "\n",
+    "    key = cv2.waitKey(1) & 0xFF\n",
+    "    if key == ord('q'):\n",
+    "        break\n",
+    "\n",
+    "cv2.destroyAllWindows()\n",
+    "video_reader.release()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "El artículo completo en www.aprendemachinelearning.com"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}