Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ recursive-include csrc *.h
recursive-include csrc *.cuh
recursive-include csrc *.cpp
recursive-include csrc *.hpp
recursive-include csrc *.py

recursive-include flash_attn *.cu
recursive-include flash_attn *.h
Expand Down
24 changes: 16 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,19 @@ def validate_and_update_archs(archs):

# We want this even if SKIP_CUDA_BUILD because when we run python setup.py sdist we want the .hpp
# files included in the source distribution, in case the user compiles from source.
if IS_ROCM:
if not USE_TRITON_ROCM:
subprocess.run(["git", "submodule", "update", "--init", "csrc/composable_kernel"])
if os.path.isdir(".git"):
subprocess.run(["git", "submodule", "update", "--init", "csrc/composable_kernel"], check=True)
subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"], check=True)
else:
subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"])
if IS_ROCM:
if not USE_TRITON_ROCM:
assert (
os.path.exists("csrc/composable_kernel/example/ck_tile/01_fmha/generate.py")
), "csrc/composable_kernel is missing, please use source distribution or git clone"
else:
assert (
os.path.exists("csrc/cutlass/include/cutlass/cutlass.h")
), "csrc/cutlass is missing, please use source distribution or git clone"

if not SKIP_CUDA_BUILD and not IS_ROCM:
print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__))
Expand Down Expand Up @@ -324,10 +332,10 @@ def validate_and_update_archs(archs):
if not os.path.exists("./build"):
os.makedirs("build")

os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d fwd --output_dir build --receipt 2")
os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d fwd_appendkv --output_dir build --receipt 2")
os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d fwd_splitkv --output_dir build --receipt 2")
os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d bwd --output_dir build --receipt 2")
subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "fwd", "--output_dir", "build", "--receipt", "2"], check=True)
subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "fwd_appendkv", "--output_dir", "build", "--receipt", "2"], check=True)
subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "fwd_splitkv", "--output_dir", "build", "--receipt", "2"], check=True)
subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "bwd", "--output_dir", "build", "--receipt", "2"], check=True)

# Check, if ATen/CUDAGeneratorImpl.h is found, otherwise use ATen/cuda/CUDAGeneratorImpl.h
# See https://github.com/pytorch/pytorch/pull/70650
Expand Down