diff --git a/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h b/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h
new file mode 100644
index 000000000000..2525f6abd850
--- /dev/null
+++ b/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h
@@ -0,0 +1,28 @@
+//===- GPUToNVMMPass.h - Convert GPU kernel to ROCDL dialect -----*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+#ifndef MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_
+#define MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_
+
+namespace mlir {
+struct FunctionPassBase;
+
+/// Creates a pass that lowers GPU dialect operations to ROCDL counterparts.
+FunctionPassBase *createLowerGpuOpsToROCDLOpsPass();
+
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_
diff --git a/include/mlir/LLVMIR/CMakeLists.txt b/include/mlir/LLVMIR/CMakeLists.txt
index 1d7d06bc25c1..3e5a0346ed60 100644
--- a/include/mlir/LLVMIR/CMakeLists.txt
+++ b/include/mlir/LLVMIR/CMakeLists.txt
@@ -8,9 +8,16 @@ set(LLVM_TARGET_DEFINITIONS NVVMOps.td)
 mlir_tablegen(NVVMOps.h.inc -gen-op-decls)
 mlir_tablegen(NVVMOps.cpp.inc -gen-op-defs)
 add_public_tablegen_target(MLIRNVVMOpsIncGen)
+set(LLVM_TARGET_DEFINITIONS ROCDLOps.td)
+mlir_tablegen(ROCDLOps.h.inc -gen-op-decls)
+mlir_tablegen(ROCDLOps.cpp.inc -gen-op-defs)
+add_public_tablegen_target(MLIRROCDLOpsIncGen)
 set(LLVM_TARGET_DEFINITIONS LLVMOps.td)
 mlir_tablegen(LLVMConversions.inc -gen-llvmir-conversions)
 add_public_tablegen_target(MLIRLLVMConversionsIncGen)
 set(LLVM_TARGET_DEFINITIONS NVVMOps.td)
 mlir_tablegen(NVVMConversions.inc -gen-llvmir-conversions)
 add_public_tablegen_target(MLIRNVVMConversionsIncGen)
+set(LLVM_TARGET_DEFINITIONS ROCDLOps.td)
+mlir_tablegen(ROCDLConversions.inc -gen-llvmir-conversions)
+add_public_tablegen_target(MLIRROCDLConversionsIncGen)
diff --git a/include/mlir/LLVMIR/ROCDLDialect.h b/include/mlir/LLVMIR/ROCDLDialect.h
new file mode 100644
index 000000000000..499f88f1c27e
--- /dev/null
+++ b/include/mlir/LLVMIR/ROCDLDialect.h
@@ -0,0 +1,43 @@
+//===- ROCDLDialect.h - MLIR ROCDL IR dialect ---------------------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file defines the ROCDL IR dialect in MLIR, containing ROCDL operations and
+// ROCDL specific extensions to the LLVM type system.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_LLVMIR_ROCDLDIALECT_H_
+#define MLIR_LLVMIR_ROCDLDIALECT_H_
+
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/OpDefinition.h"
+namespace mlir {
+namespace ROCDL {
+
+///// Ops /////
+#define GET_OP_CLASSES
+#include "mlir/LLVMIR/ROCDLOps.h.inc"
+
+class ROCDLDialect : public Dialect {
+public:
+  explicit ROCDLDialect(MLIRContext *context);
+};
+
+} // namespace ROCDL
+} // namespace mlir
+
+#endif /* MLIR_LLVMIR_ROCDLDIALECT_H_ */
diff --git a/include/mlir/LLVMIR/ROCDLOps.td b/include/mlir/LLVMIR/ROCDLOps.td
new file mode 100644
index 000000000000..a49f00e1b94b
--- /dev/null
+++ b/include/mlir/LLVMIR/ROCDLOps.td
@@ -0,0 +1,70 @@
+//===-- ROCDLOps.td - ROCDL IR dialect op definition file ----*- tablegen -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This is the ROCDL IR operation definition file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef ROCDLIR_OPS
+#else
+#define ROCDLIR_OPS
+
+include "mlir/LLVMIR/LLVMOpBase.td"
+
+def ROCDL_Dialect : Dialect {
+  let name = "rocdl";
+  let cppNamespace = "ROCDL";
+}
+
+class ROCDL_Op<string mnemonic, list<OpTrait> traits = []> :
+  LLVM_OpBase<ROCDL_Dialect, mnemonic, traits> {
+}
+
+class ROCDL_SpecialRegisterOp<string mnemonic,
+    list<OpTrait> traits = []> :
+  ROCDL_Op<mnemonic, !listconcat(traits, [NoSideEffect])>,
+  Results<(outs LLVM_Type:$res)>, Arguments<(ins)> {
+  string llvmBuilder = "$res = createIntrinsicCall(builder,"
+    # "llvm::Intrinsic::amdgcn_" # !subst(".","_", mnemonic) # ");";
+  let parser = [{ return parseROCDLOp(parser, result); }];
+  let printer = [{ printROCDLOp(p, this->getOperation()); }];
+}
+
+def ROCDL_ThreadIdXOp : ROCDL_SpecialRegisterOp<"workitem.id.x">;
+def ROCDL_ThreadIdYOp : ROCDL_SpecialRegisterOp<"workitem.id.y">;
+def ROCDL_ThreadIdZOp : ROCDL_SpecialRegisterOp<"workitem.id.z">;
+def ROCDL_BlockIdXOp : ROCDL_SpecialRegisterOp<"workgroup.id.x">;
+def ROCDL_BlockIdYOp : ROCDL_SpecialRegisterOp<"workgroup.id.y">;
+def ROCDL_BlockIdZOp : ROCDL_SpecialRegisterOp<"workgroup.id.z">;
+
+class ROCDL_DeviceFunctionOp<string mnemonic, string device_function, int parameter,
+    list<OpTrait> traits = []> :
+  ROCDL_Op<mnemonic, !listconcat(traits, [NoSideEffect])>,
+  Results<(outs LLVM_Type:$res)>, Arguments<(ins)> {
+  string llvmBuilder = "$res = createDeviceFunctionCall(builder, \""# device_function # "\", " # parameter # ");";
+  let parser = [{ return parseROCDLOp(parser, result); }];
+  let printer = [{ printROCDLOp(p, this->getOperation()); }];
+}
+
+def ROCDL_BlockDimXOp : ROCDL_DeviceFunctionOp<"workgroup.dim.x", "__ockl_get_local_size", 0>;
+def ROCDL_BlockDimYOp : ROCDL_DeviceFunctionOp<"workgroup.dim.y", "__ockl_get_local_size", 1>;
+def ROCDL_BlockDimZOp : ROCDL_DeviceFunctionOp<"workgroup.dim.z", "__ockl_get_local_size", 2>;
+def ROCDL_GridDimXOp : ROCDL_DeviceFunctionOp<"grid.dim.x", "__ockl_get_global_size", 0>;
+def ROCDL_GridDimYOp : ROCDL_DeviceFunctionOp<"grid.dim.y", "__ockl_get_global_size", 1>;
+def ROCDL_GridDimZOp : ROCDL_DeviceFunctionOp<"grid.dim.z", "__ockl_get_global_size", 2>;
+
+#endif // ROCDLIR_OPS
diff --git a/include/mlir/Target/ROCDLIR.h b/include/mlir/Target/ROCDLIR.h
new file mode 100644
index 000000000000..a7be2ebefc19
--- /dev/null
+++ b/include/mlir/Target/ROCDLIR.h
@@ -0,0 +1,44 @@
+//===- ROCDLIR.h - MLIR to LLVM + ROCDL IR conversion -------------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file declares the entry point for the MLIR to LLVM + ROCDL IR conversion.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TARGET_ROCDLIR_H
+#define MLIR_TARGET_ROCDLIR_H
+
+#include <memory>
+
+// Forward-declare LLVM classses.
+namespace llvm {
+class Module;
+} // namespace llvm
+
+namespace mlir {
+class ModuleOp;
+
+/// Convert the given MLIR module into ROCDL IR. This conversion requires the
+/// registration of the LLVM IR dialect and will extract the LLVM context
+/// from the registered LLVM IR dialect.  In case of error, report it
+/// to the error handler registered with the MLIR context, if any (obtained from
+/// the MLIR module), and return `nullptr`.
+std::unique_ptr<llvm::Module> translateModuleToROCDLIR(ModuleOp m);
+
+} // namespace mlir
+
+#endif // MLIR_TARGET_ROCDLIR_H
diff --git a/lib/Conversion/CMakeLists.txt b/lib/Conversion/CMakeLists.txt
index 1ddd103f28e8..c715c8ff4a1e 100644
--- a/lib/Conversion/CMakeLists.txt
+++ b/lib/Conversion/CMakeLists.txt
@@ -2,6 +2,7 @@ add_subdirectory(LoopsToGPU)
 add_subdirectory(ControlFlowToCFG)
 add_subdirectory(GPUToCUDA)
 add_subdirectory(GPUToNVVM)
+add_subdirectory(GPUToROCDL)
 add_subdirectory(GPUToSPIRV)
 add_subdirectory(StandardToLLVM)
 add_subdirectory(StandardToSPIRV)
diff --git a/lib/Conversion/GPUToROCDL/CMakeLists.txt b/lib/Conversion/GPUToROCDL/CMakeLists.txt
new file mode 100644
index 000000000000..3c97e5ca86ba
--- /dev/null
+++ b/lib/Conversion/GPUToROCDL/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_llvm_library(MLIRGPUtoROCDLTransforms
+  LowerGpuOpsToROCDLOps.cpp
+  )
+target_link_libraries(MLIRGPUtoROCDLTransforms
+  LLVMSupport
+  MLIRGPU
+  MLIRLLVMIR
+  MLIRROCDLIR
+  MLIRPass
+  )
diff --git a/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
new file mode 100644
index 000000000000..83ac0939ccf8
--- /dev/null
+++ b/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -0,0 +1,142 @@
+//===- LowerGpuOpsToROCDLOps.cpp - MLIR GPU to ROCDL lowering passes --------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements a pass to generate ROCDLIR operations for higher-level
+// GPU operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/GPU/GPUDialect.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/StandardTypes.h"
+#include "mlir/LLVMIR/LLVMDialect.h"
+#include "mlir/LLVMIR/ROCDLDialect.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+
+#include "llvm/ADT/StringSwitch.h"
+
+namespace mlir {
+namespace {
+
+// A pass that replaces all occurences of GPU operations with their
+// corresponding ROCDL equivalent.
+//
+// This pass does not handle launching of kernels. Instead, it is meant to be
+// used on the body region of a launch or the body region of a kernel
+// function.
+class LowerGpuOpsToROCDLOpsPass : public FunctionPass<LowerGpuOpsToROCDLOpsPass> {
+private:
+  enum dimension { X = 0, Y = 1, Z = 2, invalid };
+
+  template <typename T> dimension dimensionToIndex(T op) {
+    return llvm::StringSwitch<dimension>(op.dimension())
+        .Case("x", X)
+        .Case("y", Y)
+        .Case("z", Z)
+        .Default(invalid);
+  }
+
+  // Helper that replaces Op with XOp, YOp, or ZOp dependeing on the dimension
+  // that Op operates on.  Op is assumed to return an `std.index` value and
+  // XOp, YOp and ZOp are assumed to return an `llvm.i32` value.  Depending on
+  // `indexBitwidth`, sign-extend or truncate the resulting value to match the
+  // bitwidth expected by the consumers of the value.
+  template <typename XOp, typename YOp, typename ZOp, class Op>
+  void replaceWithIntrinsicOrDeviceFunction(Op operation, LLVM::LLVMDialect *dialect,
+                            unsigned indexBitwidth) {
+    assert(operation.getType().isIndex() &&
+           "expected an operation returning index");
+    OpBuilder builder(operation);
+    auto loc = operation.getLoc();
+    Value *newOp;
+    switch (dimensionToIndex(operation)) {
+    case X:
+      newOp = builder.create<XOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
+      break;
+    case Y:
+      newOp = builder.create<YOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
+      break;
+    case Z:
+      newOp = builder.create<ZOp>(loc, LLVM::LLVMType::getInt32Ty(dialect));
+      break;
+    default:
+      operation.emitError("Illegal dimension: " + operation.dimension());
+      signalPassFailure();
+      return;
+    }
+
+    if (indexBitwidth > 32) {
+      newOp = builder.create<LLVM::SExtOp>(
+          loc, LLVM::LLVMType::getIntNTy(dialect, indexBitwidth), newOp);
+    } else if (indexBitwidth < 32) {
+      newOp = builder.create<LLVM::TruncOp>(
+          loc, LLVM::LLVMType::getIntNTy(dialect, indexBitwidth), newOp);
+    }
+    operation.replaceAllUsesWith(newOp);
+    operation.erase();
+  }
+
+public:
+  void runOnFunction() {
+    LLVM::LLVMDialect *llvmDialect =
+        getContext().getRegisteredDialect<LLVM::LLVMDialect>();
+    unsigned indexBitwidth =
+        llvmDialect->getLLVMModule().getDataLayout().getPointerSizeInBits();
+    getFunction().walk([&](Operation *opInst) {
+      if (auto threadId = dyn_cast<gpu::ThreadId>(opInst)) {
+        replaceWithIntrinsicOrDeviceFunction<ROCDL::ThreadIdXOp, ROCDL::ThreadIdYOp,
+                             ROCDL::ThreadIdZOp>(threadId, llvmDialect,
+                                                indexBitwidth);
+        return;
+      }
+      if (auto blockId = dyn_cast<gpu::BlockId>(opInst)) {
+        replaceWithIntrinsicOrDeviceFunction<ROCDL::BlockIdXOp, ROCDL::BlockIdYOp,
+                             ROCDL::BlockIdZOp>(blockId, llvmDialect,
+                                               indexBitwidth);
+        return;
+      }
+
+      // BlockDimX/Y/Z are implemented as device functions on ROCDL.
+      if (auto blockDim = dyn_cast<gpu::BlockDim>(opInst)) {
+        replaceWithIntrinsicOrDeviceFunction<
+            ROCDL::BlockDimXOp, ROCDL::BlockDimYOp, ROCDL::BlockDimZOp>(
+            blockDim, llvmDialect, indexBitwidth);
+        return;
+      }
+      // GridDimX/Y/Z are implemented as device functions on ROCDL.
+      if (auto gridDim = dyn_cast<gpu::GridDim>(opInst)) {
+        replaceWithIntrinsicOrDeviceFunction<
+            ROCDL::GridDimXOp, ROCDL::GridDimYOp, ROCDL::GridDimZOp>(
+            gridDim, llvmDialect, indexBitwidth);
+        return;
+      }
+    });
+  }
+};
+
+} // anonymous namespace
+
+FunctionPassBase *createLowerGpuOpsToROCDLOpsPass() {
+  return new LowerGpuOpsToROCDLOpsPass();
+}
+
+static PassRegistration<LowerGpuOpsToROCDLOpsPass>
+    pass("lower-gpu-ops-to-rocdl-ops",
+         "Generate ROCDL operations for gpu operations");
+
+} // namespace mlir
diff --git a/lib/LLVMIR/CMakeLists.txt b/lib/LLVMIR/CMakeLists.txt
index 5e21850dbac5..546bfda69dcf 100644
--- a/lib/LLVMIR/CMakeLists.txt
+++ b/lib/LLVMIR/CMakeLists.txt
@@ -15,3 +15,12 @@ add_llvm_library(MLIRNVVMIR
   )
 add_dependencies(MLIRNVVMIR MLIRNVVMOpsIncGen MLIRNVVMConversionsIncGen LLVMAsmParser LLVMCore LLVMSupport)
 target_link_libraries(MLIRNVVMIR LLVMAsmParser LLVMCore LLVMSupport)
+
+add_llvm_library(MLIRROCDLIR
+  IR/ROCDLDialect.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/LLVMIR
+  )
+add_dependencies(MLIRROCDLIR MLIRROCDLOpsIncGen MLIRROCDLConversionsIncGen LLVMAsmParser LLVMCore LLVMSupport)
+target_link_libraries(MLIRROCDLIR LLVMAsmParser LLVMCore LLVMSupport)
diff --git a/lib/LLVMIR/IR/ROCDLDialect.cpp b/lib/LLVMIR/IR/ROCDLDialect.cpp
new file mode 100644
index 000000000000..80f0001c1fff
--- /dev/null
+++ b/lib/LLVMIR/IR/ROCDLDialect.cpp
@@ -0,0 +1,86 @@
+//===- ROCDLDialect.cpp - ROCDL IR Ops and Dialect registration -------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file defines the types and operation details for the ROCDL IR dialect in
+// MLIR, and the LLVM IR dialect.  It also registers the dialect.
+//
+// The ROCDL dialect only contains GPU specific additions on top of the general
+// LLVM dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/LLVMIR/ROCDLDialect.h"
+
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/StandardTypes.h"
+#include "mlir/LLVMIR/LLVMDialect.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/SourceMgr.h"
+
+namespace mlir {
+namespace ROCDL {
+
+//===----------------------------------------------------------------------===//
+// Printing/parsing for ROCDL ops
+//===----------------------------------------------------------------------===//
+
+static void printROCDLOp(OpAsmPrinter *p, Operation *op) {
+  *p << op->getName() << " : ";
+  if (op->getNumResults() == 1) {
+    *p << op->getResult(0)->getType();
+  } else {
+    *p << "###invalid type###";
+  }
+}
+
+// <operation> ::= `llvm.rocdl.XYZ` : type
+static ParseResult parseROCDLOp(OpAsmParser *parser, OperationState *result) {
+  Type type;
+  if (parser->parseOptionalAttributeDict(result->attributes) ||
+      parser->parseColonType(type))
+    return failure();
+
+  result->addTypes(type);
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// ROCDLDialect initialization, type parsing, and registration.
+//===----------------------------------------------------------------------===//
+
+ROCDLDialect::ROCDLDialect(MLIRContext *context) : Dialect("rocdl", context) {
+  addOperations<
+#define GET_OP_LIST
+#include "mlir/LLVMIR/ROCDLOps.cpp.inc"
+      >();
+
+  // Support unknown operations because not all ROCDL operations are registered.
+  allowUnknownOperations();
+}
+
+#define GET_OP_CLASSES
+#include "mlir/LLVMIR/ROCDLOps.cpp.inc"
+
+static DialectRegistration<ROCDLDialect> rocdlDialect;
+
+} // namespace ROCDL
+} // namespace mlir
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 9f49b813336b..111e2f673137 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -28,3 +28,17 @@ target_link_libraries(MLIRTargetNVVMIR
   MLIRNVVMIR
   MLIRTargetLLVMIRModuleTranslation
   )
+add_llvm_library(MLIRTargetROCDLIR
+  LLVMIR/ConvertToROCDLIR.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR
+  DEPENDS
+  intrinsics_gen
+  )
+target_link_libraries(MLIRTargetROCDLIR
+  MLIRGPU
+  MLIRIR
+  MLIRROCDLIR
+  MLIRTargetLLVMIRModuleTranslation
+  )
diff --git a/lib/Target/LLVMIR/ConvertToROCDLIR.cpp b/lib/Target/LLVMIR/ConvertToROCDLIR.cpp
new file mode 100644
index 000000000000..568dc19cd3d0
--- /dev/null
+++ b/lib/Target/LLVMIR/ConvertToROCDLIR.cpp
@@ -0,0 +1,117 @@
+//===- ConvertToROCDLIR.cpp - MLIR to LLVM IR conversion -------------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements a translation between the MLIR LLVM + ROCDL dialects and
+// LLVM IR with ROCDL intrinsics and metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Target/ROCDLIR.h"
+
+#include "mlir/GPU/GPUDialect.h"
+#include "mlir/IR/Function.h"
+#include "mlir/IR/Module.h"
+#include "mlir/LLVMIR/ROCDLDialect.h"
+#include "mlir/Support/FileUtilities.h"
+#include "mlir/Target/LLVMIR/ModuleTranslation.h"
+#include "mlir/Translation.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+using namespace mlir;
+
+namespace {
+static llvm::Value *createIntrinsicCall(llvm::IRBuilder<> &builder,
+                                        llvm::Intrinsic::ID intrinsic) {
+  llvm::Module *module = builder.GetInsertBlock()->getModule();
+  llvm::Function *fn = llvm::Intrinsic::getDeclaration(module, intrinsic, {});
+  return builder.CreateCall(fn);
+}
+
+// ROCM TODO: review interface
+static llvm::Value *createDeviceFunctionCall(llvm::IRBuilder<> &builder,
+                                             StringRef fn_name, int parameter) {
+  llvm::Module *module = builder.GetInsertBlock()->getModule();
+  llvm::FunctionType *fn_type = llvm::FunctionType::get(
+      llvm::Type::getInt32Ty(module->getContext()), // return type.
+      llvm::Type::getInt32Ty(module->getContext()), // parameter type.
+      false);                                       // no variadic arguments.
+  llvm::Function *fn = llvm::dyn_cast<llvm::Function>(
+      module->getOrInsertFunction(fn_name, fn_type).getCallee());
+  llvm::ArrayRef<llvm::Value *> operands(llvm::ConstantInt::get(
+      llvm::Type::getInt32Ty(module->getContext()), parameter));
+  return builder.CreateCall(fn, operands);
+}
+
+class ModuleTranslation : public LLVM::ModuleTranslation {
+
+public:
+  explicit ModuleTranslation(ModuleOp module)
+      : LLVM::ModuleTranslation(module) {}
+  ~ModuleTranslation() override {}
+
+protected:
+  bool convertOperation(Operation &opInst,
+                        llvm::IRBuilder<> &builder) override {
+
+#include "mlir/LLVMIR/ROCDLConversions.inc"
+
+    return LLVM::ModuleTranslation::convertOperation(opInst, builder);
+  }
+};
+} // namespace
+
+std::unique_ptr<llvm::Module> mlir::translateModuleToROCDLIR(ModuleOp m) {
+  ModuleTranslation translation(m);
+  auto llvmModule =
+      LLVM::ModuleTranslation::translateModule<ModuleTranslation>(m);
+
+  // Insert AMDGPU_KERNEL calling convention.
+  // Insert amdgpu-flat-workgroup-size(1, 1024) attribute.
+  for (FuncOp func : m.getOps<FuncOp>()) {
+    if (!func.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelFuncAttrName()))
+      continue;
+
+    auto *llvmFunc = llvmModule->getFunction(func.getName());
+
+    llvmFunc->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+    llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1, 1024");
+  }
+
+  return llvmModule;
+}
+
+static TranslateFromMLIRRegistration
+    registration("mlir-to-rocdlir",
+                 [](ModuleOp module, llvm::StringRef outputFilename) {
+                   if (!module)
+                     return failure();
+
+                   auto llvmModule = mlir::translateModuleToROCDLIR(module);
+                   if (!llvmModule)
+                     return failure();
+
+                   auto file = openOutputFile(outputFilename);
+                   if (!file)
+                     return failure();
+
+                   llvmModule->print(file->os(), nullptr);
+                   file->keep();
+                   return success();
+                 });
diff --git a/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
new file mode 100644
index 000000000000..4b5177e3f733
--- /dev/null
+++ b/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -0,0 +1,35 @@
+// RUN: mlir-opt %s -lower-gpu-ops-to-rocdl-ops | FileCheck %s
+
+// CHECK-LABEL: func @gpu_index_ops()
+func @gpu_index_ops()
+    attributes { gpu.kernel } {
+  // CHECK: = rocdl.workitem.id.x : !llvm.i32
+  %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
+  // CHECK: = rocdl.workitem.id.y : !llvm.i32
+  %tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
+  // CHECK: = rocdl.workitem.id.z : !llvm.i32
+  %tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index)
+
+  // CHECK: = rocdl.workgroup.id.x : !llvm.i32
+  %bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index)
+  // CHECK: = rocdl.workgroup.id.y : !llvm.i32
+  %bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index)
+  // CHECK: = rocdl.workgroup.id.z : !llvm.i32
+  %bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index)
+
+  // CHECK: = rocdl.workgroup.dim.x : !llvm.i32
+  %bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index)
+  // CHECK: = rocdl.workgroup.dim.y : !llvm.i32
+  %bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index)
+  // CHECK: = rocdl.workgroup.dim.z : !llvm.i32
+  %bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index)
+
+  // CHECK: = rocdl.grid.dim.x : !llvm.i32
+  %gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index)
+  // CHECK: = rocdl.grid.dim.y : !llvm.i32
+  %gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index)
+  // CHECK: = rocdl.grid.dim.z : !llvm.i32
+  %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
+
+  std.return
+}
diff --git a/test/LLVMIR/rocdl.mlir b/test/LLVMIR/rocdl.mlir
new file mode 100644
index 000000000000..82f82ea99f1a
--- /dev/null
+++ b/test/LLVMIR/rocdl.mlir
@@ -0,0 +1,29 @@
+// RUN: mlir-opt %s | FileCheck %s
+
+func @rocdl_special_regs() -> !llvm.i32 {
+  // CHECK: %0 = rocdl.workitem.id.x : !llvm.i32
+  %0 = rocdl.workitem.id.x : !llvm.i32
+  // CHECK: %1 = rocdl.workitem.id.y : !llvm.i32
+  %1 = rocdl.workitem.id.y : !llvm.i32
+  // CHECK: %2 = rocdl.workitem.id.z : !llvm.i32
+  %2 = rocdl.workitem.id.z : !llvm.i32
+  // CHECK: %3 = rocdl.workgroup.id.x : !llvm.i32
+  %3 = rocdl.workgroup.id.x : !llvm.i32
+  // CHECK: %4 = rocdl.workgroup.id.y : !llvm.i32
+  %4 = rocdl.workgroup.id.y : !llvm.i32
+  // CHECK: %5 = rocdl.workgroup.id.z : !llvm.i32
+  %5 = rocdl.workgroup.id.z : !llvm.i32
+  // CHECK: %6 = rocdl.workgroup.dim.x : !llvm.i32
+  %6 = rocdl.workgroup.dim.x : !llvm.i32
+  // CHECK: %7 = rocdl.workgroup.dim.y : !llvm.i32
+  %7 = rocdl.workgroup.dim.y : !llvm.i32
+  // CHECK: %8 = rocdl.workgroup.dim.z : !llvm.i32
+  %8 = rocdl.workgroup.dim.z : !llvm.i32
+  // CHECK: %9 = rocdl.grid.dim.x : !llvm.i32
+  %9 = rocdl.grid.dim.x : !llvm.i32
+  // CHECK: %10 = rocdl.grid.dim.y : !llvm.i32
+  %10 = rocdl.grid.dim.y : !llvm.i32
+  // CHECK: %11 = rocdl.grid.dim.z : !llvm.i32
+  %11 = rocdl.grid.dim.z : !llvm.i32
+  llvm.return %0 : !llvm.i32
+}
diff --git a/test/Target/rocdlir.mlir b/test/Target/rocdlir.mlir
new file mode 100644
index 000000000000..65b97d0aa689
--- /dev/null
+++ b/test/Target/rocdlir.mlir
@@ -0,0 +1,35 @@
+// RUN: mlir-translate -mlir-to-rocdlir %s | FileCheck %s
+
+func @rocdl_special_regs() -> !llvm.i32 {
+  // CHECK: %1 = call i32 @llvm.amdgcn.workitem.id.x()
+  %1 = rocdl.workitem.id.x : !llvm.i32
+  // CHECK: %2 = call i32 @llvm.amdgcn.workitem.id.y()
+  %2 = rocdl.workitem.id.y : !llvm.i32
+  // CHECK: %3 = call i32 @llvm.amdgcn.workitem.id.z()
+  %3 = rocdl.workitem.id.z : !llvm.i32
+  // CHECK: %4 = call i32 @llvm.amdgcn.workgroup.id.x()
+  %4 = rocdl.workgroup.id.x : !llvm.i32
+  // CHECK: %5 = call i32 @llvm.amdgcn.workgroup.id.y()
+  %5 = rocdl.workgroup.id.y : !llvm.i32
+  // CHECK: %6 = call i32 @llvm.amdgcn.workgroup.id.z()
+  %6 = rocdl.workgroup.id.z : !llvm.i32
+  // CHECK: %7 = call i32 @__ockl_get_local_size(i32 0)
+  %7 = rocdl.workgroup.dim.x : !llvm.i32
+  // CHECK: %8 = call i32 @__ockl_get_local_size(i32 1)
+  %8 = rocdl.workgroup.dim.y : !llvm.i32
+  // CHECK: %9 = call i32 @__ockl_get_local_size(i32 2)
+  %9 = rocdl.workgroup.dim.z : !llvm.i32
+  // CHECK: %10 = call i32 @__ockl_get_global_size(i32 0)
+  %10 = rocdl.grid.dim.x : !llvm.i32
+  // CHECK: %11 = call i32 @__ockl_get_global_size(i32 1)
+  %11 = rocdl.grid.dim.y : !llvm.i32
+  // CHECK: %12 = call i32 @__ockl_get_global_size(i32 2)
+  %12 = rocdl.grid.dim.z : !llvm.i32
+  llvm.return %1 : !llvm.i32
+}
+
+// This function has the "amdgpu_kernel" calling convention after conversion.
+// CHECK:     amdgpu_kernel
+func @kernel_func() attributes {gpu.kernel} {
+  llvm.return
+}
diff --git a/tools/mlir-opt/CMakeLists.txt b/tools/mlir-opt/CMakeLists.txt
index 26f8885a2428..7eedd8bca069 100644
--- a/tools/mlir-opt/CMakeLists.txt
+++ b/tools/mlir-opt/CMakeLists.txt
@@ -24,6 +24,7 @@ set(LIBS
   MLIRFxpMathOps
   MLIRGPU
   MLIRGPUtoNVVMTransforms
+  MLIRGPUtoROCDLTransforms
   MLIRGPUtoSPIRVTransforms
   MLIRLinalg
   MLIRLLVMIR
@@ -34,6 +35,7 @@ set(LIBS
   MLIRPass
   MLIRQuantizerTransforms
   MLIRQuantOps
+  MLIRROCDLIR
   MLIRSPIRV
   MLIRSPIRVConversion
   MLIRStandardOps
diff --git a/tools/mlir-translate/CMakeLists.txt b/tools/mlir-translate/CMakeLists.txt
index 50df9de8cae7..8f03de449579 100644
--- a/tools/mlir-translate/CMakeLists.txt
+++ b/tools/mlir-translate/CMakeLists.txt
@@ -9,6 +9,7 @@ set(LIBS
   MLIRStandardOps
   MLIRTargetLLVMIR
   MLIRTargetNVVMIR
+  MLIRTargetROCDLIR
   MLIRTransforms
   MLIRTranslation
   MLIRSupport