moreh-dev · hieule88 · Apr 11, 2024 · Apr 11, 2024 · Apr 12, 2024 · Apr 12, 2024
diff --git a/.githooks/pre-commit b/.githooks/pre-commit
@@ -40,4 +40,3 @@ do
         "$format" -i -style=file "$file"
     fi
 done
-
diff --git a/docs/reference/index.rst b/docs/reference/index.rst
@@ -32,3 +32,4 @@ The MIOpen API library is structured as follows:
   * :doc:`GroupNorm <../doxygen/html/group__groupnorm>` (experimental)
   * :doc:`Cat <../doxygen/html/group__cat>` (experimental)
   * :doc:`Argmax<./argmax>` (experimental)
+  * :doc:`NLLLoss<../doxygen/html/group__nllloss>` (experimental)
diff --git a/driver/CMakeLists.txt b/driver/CMakeLists.txt
@@ -51,6 +51,7 @@ add_executable(MIOpenDriver
     dm_softmax.cpp
     dm_sum.cpp
     dm_tensorop.cpp
+    dm_nllloss.cpp
     main.cpp
     registry_driver_maker.cpp
     rocrand_wrapper.cpp)

diff --git a/driver/dm_nllloss.cpp b/driver/dm_nllloss.cpp
@@ -0,0 +1,40 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#include "registry_driver_maker.hpp"
+#include "nllloss_driver.hpp"
+
+static Driver* makeDriver(const std::string& base_arg)
+{
+    if(base_arg == "nllloss")
+        return new NLLLossDriver<float, float>();
+    if(base_arg == "nlllossfp16")
+        return new NLLLossDriver<float16, float>();
+    if(base_arg == "nlllossbfp16")
+        return new NLLLossDriver<bfloat16, float>();
+    return nullptr;
+}
+
+REGISTER_DRIVER_MAKER(makeDriver);
diff --git a/driver/driver.hpp b/driver/driver.hpp
@@ -151,7 +151,7 @@ inline void PadBufferSize(size_t& sz, int datatype_sz)
            "pool[fp16], lrn[fp16], "
            "activ[fp16], softmax[fp16], bnorm[fp16], rnn[fp16], gemm[fp16], ctc, dropout[fp16], "
            "tensorop[fp16], reduce[fp16|fp64], layernorm[bfp16|fp16], sum[bfp16|fp16], "
-           "argmax[bfp16|fp16], groupnorm[bfp16|fp16], cat[bfp16|fp16]\n");
+           "argmax[bfp16|fp16], groupnorm[bfp16|fp16], cat[bfp16|fp16], nllloss[bfp16|fp16]\n");
     exit(0); // NOLINT (concurrency-mt-unsafe)
 }
 
@@ -176,7 +176,8 @@ inline std::string ParseBaseArg(int argc, char* argv[])
        arg != "layernormfp16" && arg != "layernormbfp16" && arg != "sum" && arg != "sumfp16" &&
        arg != "sumbfp16" && arg != "argmax" && arg != "argmaxfp16" && arg != "argmaxbfp16" &&
        arg != "groupnorm" && arg != "groupnormfp16" && arg != "groupnormbfp16" && arg != "cat" &&
-       arg != "catfp16" && arg != "catbfp16" && arg != "--version")
+       arg != "catfp16" && arg != "catbfp16" && arg != "nllloss" && arg != "nlllossfp16" && 
+       arg != "nlllossbfp16" && arg != "--version")
     {
         printf("FAILED: Invalid Base Input Argument\n");
         Usage();

diff --git a/driver/mloNLLLossHost.hpp b/driver/mloNLLLossHost.hpp
@@ -0,0 +1,74 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#ifndef MLO_NLLLOSSHOST_H_
+#define MLO_NLLLOSSHOST_H_
+
+#include <miopen/tensor.hpp>
+
+template <typename Tgpu, typename Tcheck>
+int32_t mloNLLLossForwardRunHost(miopenTensorDescriptor_t inputDesc,
+                                 Tgpu* input,
+                                 int32_t* target,
+                                 Tgpu* weight,
+                                 Tcheck* outputhost,
+                                 int32_t ignore_index)
+{
+    auto dims = miopen::deref(inputDesc).GetLengths();
+
+    size_t N  = dims[0];
+    size_t C  = dims[1];
+    size_t D1 = dims[2];
+    size_t D2 = dims[3];
+
+    for(size_t n = 0; n < N; n++)
+    {
+        for(size_t d1 = 0; d1 < D1; d1++)
+        {
+            for(size_t d2 = 0; d2 < D2; d2++)
+            {
+                size_t target_index = n * D1 * D2 + d1 * D2 + d2;
+                int32_t t           = target[target_index];
+                size_t input_index  = (n * C + t) * D1 * D2 + d1 * D2 + d2;
+                size_t weight_index = t;
+                size_t output_index = target_index;
+
+                if(t < 0 || t == ignore_index || t >= C)
+                {
+                    outputhost[output_index] = static_cast<Tcheck>(0);
+                }
+                else
+                {
+                    outputhost[output_index] = static_cast<Tcheck>(-1) *
+                                               static_cast<Tcheck>(weight[weight_index]) *
+                                               static_cast<Tcheck>(input[input_index]);
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+#endif // MLO_NLLLOSSHOST_H_