Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[MXNET-1249] Fix Object Detector Performance with GPU (#13522)
Browse files Browse the repository at this point in the history
* Reduce post processing time

* fix ssd

* fix the CI

* add comments
  • Loading branch information
lanking520 committed Dec 5, 2018
1 parent cb0db29 commit 1c8972c
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory

import scala.io
import scala.collection.mutable.ListBuffer
import scala.collection.parallel.mutable.ParArray

trait ClassifierBase {

Expand Down Expand Up @@ -110,16 +111,21 @@ class Classifier(modelPathPrefix: String,
: IndexedSeq[IndexedSeq[(String, Float)]] = {

// considering only the first output
val predictResultND: NDArray = predictor.predictWithNDArray(input)(0)

val predictResult: ListBuffer[Array[Float]] = ListBuffer[Array[Float]]()
// Copy NDArray to CPU to avoid frequent GPU to CPU copying
val predictResultND: NDArray =
predictor.predictWithNDArray(input)(0).asInContext(Context.cpu())
// Parallel Execution with ParArray for better performance
val predictResultPar: ParArray[Array[Float]] =
new ParArray[Array[Float]](predictResultND.shape(0))

// iterating over the individual items(batch size is in axis 0)
for (i <- 0 until predictResultND.shape(0)) {
(0 until predictResultND.shape(0)).toVector.par.foreach( i => {
val r = predictResultND.at(i)
predictResult += r.toArray
predictResultPar(i) = r.toArray
r.dispose()
}
})

val predictResult = predictResultPar.toArray

var result: ListBuffer[IndexedSeq[(String, Float)]] =
ListBuffer.empty[IndexedSeq[(String, Float)]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package org.apache.mxnet.infer

// scalastyle:off
import java.awt.image.BufferedImage

import scala.collection.parallel.mutable.ParArray
// scalastyle:on
import org.apache.mxnet.NDArray
import org.apache.mxnet.DataDesc
Expand Down Expand Up @@ -94,39 +96,39 @@ class ObjectDetector(modelPathPrefix: String,
def objectDetectWithNDArray(input: IndexedSeq[NDArray], topK: Option[Int])
: IndexedSeq[IndexedSeq[(String, Array[Float])]] = {

val predictResult = predictor.predictWithNDArray(input)(0)
var batchResult = ListBuffer[IndexedSeq[(String, Array[Float])]]()
for (i <- 0 until predictResult.shape(0)) {
// Copy NDArray to CPU to avoid frequent GPU to CPU copying
val predictResult = predictor.predictWithNDArray(input)(0).asInContext(Context.cpu())
// Parallel Execution with ParArray for better performance
var batchResult = new ParArray[IndexedSeq[(String, Array[Float])]](predictResult.shape(0))
(0 until predictResult.shape(0)).toArray.par.foreach( i => {
val r = predictResult.at(i)
batchResult += sortAndReformat(r, topK)
batchResult(i) = sortAndReformat(r, topK)
handler.execute(r.dispose())
}
})
handler.execute(predictResult.dispose())
batchResult.toIndexedSeq
}

private[infer] def sortAndReformat(predictResultND: NDArray, topK: Option[Int])
: IndexedSeq[(String, Array[Float])] = {
val predictResult: ListBuffer[Array[Float]] = ListBuffer[Array[Float]]()
val accuracy: ListBuffer[Float] = ListBuffer[Float]()

// iterating over the all the predictions
val length = predictResultND.shape(0)

for (i <- 0 until length) {
val predictResult = (0 until length).toArray.par.flatMap( i => {
val r = predictResultND.at(i)
val tempArr = r.toArray
if (tempArr(0) != -1.0) {
predictResult += tempArr
accuracy += tempArr(1)
val res = if (tempArr(0) != -1.0) {
Array[Array[Float]](tempArr)
} else {
// Ignore the minus 1 part
Array[Array[Float]]()
}
handler.execute(r.dispose())
}
res
}).toArray
var result = IndexedSeq[(String, Array[Float])]()
if (topK.isDefined) {
var sortedIndices = accuracy.zipWithIndex.sortBy(-_._1).map(_._2)
var sortedIndices = predictResult.zipWithIndex.sortBy(-_._1(1)).map(_._2)
sortedIndices = sortedIndices.take(topK.get)
// takeRight(5) would provide the output as Array[Accuracy, Xmin, Ymin, Xmax, Ymax
result = sortedIndices.map(idx
Expand All @@ -136,7 +138,6 @@ class ObjectDetector(modelPathPrefix: String,
result = predictResult.map(ele
=> (synset(ele(0).toInt), ele.takeRight(5))).toIndexedSeq
}

result
}

Expand Down

0 comments on commit 1c8972c

Please sign in to comment.