Skip to content

Commit ed6bf74

Browse files
committed
Added support for gfx950 architecture platform
1 parent 6304124 commit ed6bf74

File tree

12 files changed

+24
-13
lines changed

12 files changed

+24
-13
lines changed

tensorflow/core/grappler/optimizers/auto_mixed_precision.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ bool HasFastFP16Support(const DeviceProperties& props) {
116116
#elif TENSORFLOW_USE_ROCM
117117
absl::flat_hash_set<std::string> FP16SupportedDevices = {
118118
{"gfx906"}, {"gfx908"}, {"gfx90a"}, {"gfx910"}, {"gfx940"}, {"gfx941"},
119-
{"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"},
119+
{"gfx942"}, {"gfx950"},
120+
{"gfx1010"}, {"gfx1012"}, {"gfx1030"},
120121
{"gfx1100"}, {"gfx1101"}, {"gfx1102"},
121122
{"gfx1200"}, {"gfx1201"}
122123
};

tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ inline GpuStats GetNumGPUs(const Cluster& cluster) {
7373
compute_capability_it->second == "gfx940" ||
7474
compute_capability_it->second == "gfx941" ||
7575
compute_capability_it->second == "gfx942" ||
76+
compute_capability_it->second == "gfx950" ||
7677
compute_capability_it->second == "gfx1101" ||
7778
compute_capability_it->second == "gfx1102" ||
7879
compute_capability_it->second == "gfx1200" ||

tensorflow/core/util/gpu_device_functions.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ __device__ inline double GpuAtomicAdd(double* ptr, double value) {
743743
}
744744
#endif
745745

746-
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
746+
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx950__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
747747

748748
#define ADDRSP1 __attribute__((address_space(1)))
749749
__device__ float
@@ -963,7 +963,7 @@ __device__ inline int64_t GpuAtomicMin(int64_t* ptr, int64_t value) {
963963
}
964964
#endif
965965

966-
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
966+
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx950__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
967967
// Low level instructions don't return. For now, assume that return value
968968
// is always unused.
969969
__device__ float GpuAtomicAdd(float* dst, float val) {
@@ -978,7 +978,7 @@ __device__ inline T GpuAtomicAddShared(T* ptr, T value) {
978978
return GpuAtomicAdd(ptr, value);
979979
}
980980

981-
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
981+
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx950__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
982982
__device__ float GpuAtomicAddShared(float* dst, float val) {
983983
atomicAdd(dst, val);
984984
return val;

tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ COPY setup.packages.rocm.cs7.sh setup.packages.rocm.cs7.sh
88
COPY builder.packages.rocm.cs7.txt builder.packages.rocm.cs7.txt
99
RUN /setup.packages.rocm.cs7.sh /builder.packages.rocm.cs7.txt
1010

11-
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
11+
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
1212
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
1313

1414
# Install ROCM

tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ COPY setup.packages.rocm.el8.sh setup.packages.rocm.el8.sh
1717
COPY builder.packages.rocm.el8.txt builder.packages.rocm.el8.txt
1818
RUN /setup.packages.rocm.el8.sh /builder.packages.rocm.el8.txt
1919

20-
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
20+
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
2121
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
2222

2323
# Install ROCM

tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
FROM ubuntu:20.04
33
################################################################################
44

5-
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
5+
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
66
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
77

88
# Install build dependencies

tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
FROM ubuntu:22.04
33
################################################################################
44

5-
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
5+
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
66
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
77

88
# Install build dependencies

tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
FROM ubuntu:24.04
33
################################################################################
44

5-
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
5+
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
66
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
77

88
# Install build dependencies

tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ then
145145
echo "build:rocm_base --copt=-fclang-abi-compat=17" >> /etc/bazel.bazelrc
146146
fi
147147

148-
GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"}
148+
GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1030 gfx1100"}
149149

150150
echo $ROCM_VERSION
151151
echo $ROCM_REPO

third_party/xla/xla/service/gpu/llvm_gpu_backend/amdgpu_backend.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,8 @@ std::string MapGCNArchNameTokenToFeatureStr(const std::string& token,
340340
return "+sramecc";
341341
} else if (token == "sramecc-") {
342342
if (gfx == "gfx90a" || gfx == "gfx940" || gfx == "gfx941" ||
343-
gfx == "gfx942" || gfx == "gfx1101" || gfx == "gfx1102" ||
343+
gfx == "gfx942" || gfx == "gfx950" ||
344+
gfx == "gfx1101" || gfx == "gfx1102" ||
344345
gfx == "gfx1200" || gfx == "gfx1201")
345346
return "";
346347
return "-sramecc";

0 commit comments

Comments
 (0)