Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions csrc/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ std::unordered_map<DebugDumpOption, std::vector<std::string>> Options<
{"python_definition_segments", DebugDumpOption::PythonDefinitionSegments},
{"python_frontend_debug", DebugDumpOption::PythonFrontendDebug},
{"sass", DebugDumpOption::Sass},
{"sass_to_file", DebugDumpOption::SassToFile},
{"segmented_fusion", DebugDumpOption::FusionSegments},
{"segmenter_logging", DebugDumpOption::FusionSegmenterLog},
{"scheduler_params", DebugDumpOption::SchedulerDebug},
Expand Down
5 changes: 3 additions & 2 deletions csrc/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ enum class DebugDumpOption {
TransformPropagator, //! When running TransformPropagator, print propagation
//! path and replay result
Cubin, //! Dump compiled CUBIN
Sass, // Dump disassembled SASS
Sass, //! Dump disassembled SASS
SassToFile, //!< Dump disassembled SASS to File
Ptx, //! Dump compiled PTX
BankConflictInfo, //! Dump bank confliction info
SyncMap, //! RAW dependency info
Expand All @@ -79,7 +80,7 @@ enum class DebugDumpOption {
ExprSort, //! Print merging decisions on expression sorting
ExprSortVerbose, //! Print verbose debug info on expression sorting
LoopRotation, //! Print loop rotation log
Occupancy, // Dump occupancy
Occupancy, //! Dump occupancy
IndexType, //! Print the index type of the launched kernel
PredicateElimination, //! Print the predicate elimination information
IndexingVerbose, //! Print verbose debug info on indexing
Expand Down
7 changes: 7 additions & 0 deletions csrc/runtime/compiled_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,13 @@ std::unique_ptr<executor_utils::CudaExecutable> compileSource(
compiled_kernel->cubin_filename =
dumpCompiledCodeToFile(compiled_kernel->cubin, func_name, ".cubin");
}
if (isDebugDumpEnabled(DebugDumpOption::SassToFile)) {
std::string sass_str =
disassembleBinary(compiled_kernel->cubin, "-fun 1 -c");
compiled_kernel->sass = {sass_str.begin(), sass_str.end()};
compiled_kernel->sass_filename =
dumpCompiledCodeToFile(compiled_kernel->sass, func_name, ".sass");
}
}

if (!compile_to_sass || isDebugDumpEnabled(DebugDumpOption::Ptx)) {
Expand Down
2 changes: 2 additions & 0 deletions csrc/runtime/executor_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ struct CudaExecutable : public NonCopyable {
std::string cubin_filename;
std::string kernel_name;
std::string compile_args;
std::vector<char> sass;
std::string sass_filename;
long block_size = -1;
int register_spills = -1;
};
Expand Down
116 changes: 101 additions & 15 deletions tools/codediff/codediff.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,9 @@ class LaunchParams:
@dataclass
class CompiledKernel:
filename: str
code: str | None = None
cuda: str | None = None
ptx: str | None = None
sass: str | None = None
ptxas_info: str | None = None
launch_params_str: str | None = None
launch_params: LaunchParams | None = None
Expand Down Expand Up @@ -588,7 +589,7 @@ def get_kernel(
kern = self.kernel_map[test_name].kernels[kernel_number]
basename = kern.filename
fullname = os.path.join(self.directory, "cuda", basename)
kern.code = ""
kern.cuda = ""
with open(fullname, "r") as f:
for i, line in enumerate(f.readlines()):
if kern.index_type is None:
Expand All @@ -598,18 +599,26 @@ def get_kernel(
if not strip_preamble or i >= self.preamble_size_lines:
# replace kernel934 with kernel1 to facilitate diffing
# also match kernel_43 to handle new-style naming with static fusion count
kern.code += re.sub(r"\bnvfuser_\d+\b", "nvfuser_N", line)
kern.code = kern.code.rstrip()
if strip_preamble and kern.code[-1] == "}":
kern.cuda += re.sub(r"\bnvfuser_\d+\b", "nvfuser_N", line)
kern.cuda = kern.cuda.rstrip()
if strip_preamble and kern.cuda[-1] == "}":
# trailing curly brace is close of namespace. This will clean it up so that we have just the kernel
kern.code = kern.code[:-1].rstrip()
kern.cuda = kern.cuda[:-1].rstrip()
# find ptx file if it exists
ptx_basename = os.path.splitext(basename)[0] + ".ptx"
ptx_fullname = os.path.join(self.directory, "ptx", ptx_basename)
try:
kern.ptx = open(ptx_fullname, "r").read().rstrip()
except FileNotFoundError:
pass

# find sass file if it exists
sass_basename = os.path.splitext(basename)[0] + ".sass"
sass_fullname = os.path.join(self.directory, "sass", sass_basename)
try:
kern.sass = open(sass_fullname, "r").read().rstrip()
except FileNotFoundError:
pass
return kern

def join(self, other: "TestRun"):
Expand Down Expand Up @@ -672,14 +681,23 @@ class KernelDiff:
kernel2: CompiledKernel
diff_lines: InitVar[list[str]] = []
ptx_diff_lines: InitVar[list[str] | None] = []
sass_diff_lines: InitVar[list[str] | None] = []
diff: str = field(init=False)
new_lines: int = 0
removed_lines: int = 0
ptx_diff: str | None = None
sass_diff: str | None = None
new_ptx_lines: int = 0
removed_ptx_lines: int = 0

def __post_init__(self, diff_lines: list[str], ptx_diff_lines: list[str] | None):
new_sass_lines: int = 0
removed_sass_lines: int = 0

def __post_init__(
self,
diff_lines: list[str],
ptx_diff_lines: list[str] | None,
sass_diff_lines: list[str] | None,
):
self.diff = "\n".join(diff_lines)

for line in diff_lines:
Expand All @@ -697,6 +715,15 @@ def __post_init__(self, diff_lines: list[str], ptx_diff_lines: list[str] | None)
elif line[:2] == "- ":
self.removed_ptx_lines += 1

if sass_diff_lines is not None:
self.sass_diff = "\n".join(sass_diff_lines)

for line in sass_diff_lines:
if line[:2] == "+ ":
self.new_sass_lines += 1
elif line[:2] == "- ":
self.removed_sass_lines += 1


@dataclass_json
@dataclass
Expand Down Expand Up @@ -740,6 +767,42 @@ def sanitize_ptx_lines(lines: list[str]) -> list[str]:
return sanitary_lines


def sanitize_sass_lines(lines: list[str]) -> list[str]:
"""Remove comments and remove kernel id"""
sanitary_lines = []
for l in lines:
# Replace mangled kernel names like
# _ZN76_GLOBAL__N__00000000_37___tmp_kernel_pointwise_f0_c1_r0_g0_cu_8995cef2_3255329nvfuser_pointwise_f0_c1_r0_g0ENS_6TensorIfLi2ELi2EEES1_S1_
# or
# _ZN76_GLOBAL__N__00000000_37___tmp_kernel_4_cu_8995cef2_3255329nvfuser_4ENS_6TensorIfLi2ELi2EEES1_S1_
# or
# _ZN57_GLOBAL__N__00000000_18___tmp_nvfuser_5_cu_badbb5a6_975149nvfuser_5ENS_6TensorINS_6__halfELi3ELi3EEES2_NS_9TensorMapES3_NS0_IS1_Li2ELi2EEE,(.L_x_28 - _ZN11kernelscope6kernelENS_6TensorINS_6__halfELi3ELi3EEES2_NS_9TensorMapES3_NS0_IS1_Li2ELi2EEE)

# with
# _ZN11kernelscope6kernelENS_6TensorIfLi2ELi2EEES1_S1_

# demangle first two parts after _ZN and replace with "kernelscope" and "kernel"
m = re.match(r"^(?P<prefix>^.*\b_Z?ZN)(?P<scopenamelen>\d+)_", l)
if m is not None:
d = m.groupdict()
scopenamelen = int(d["scopenamelen"])
# demangle second part in remainder after scope name
remainder = l[(len(d["prefix"]) + len(d["scopenamelen"]) + scopenamelen) :]
mrem = re.match(r"^(?P<varnamelen>\d+)", remainder)
if mrem is not None:
drem = mrem.groupdict()
varnamelen = int(drem["varnamelen"])
remainder = (
"6kernel" + remainder[len(drem["varnamelen"]) + varnamelen :]
)
l = d["prefix"] + "11kernelscope" + remainder

# Remove comments that tell us the address such as /*08a0*/
l = re.sub(r"/\*[0-9a-f]{4}\*/", "/*addr*/", l)
sanitary_lines.append(l)
return sanitary_lines


@dataclass_json
@dataclass
class TestDifferences:
Expand All @@ -751,7 +814,7 @@ class TestDifferences:
removed_tests: list[CompiledTest] = field(default_factory=list)
total_num_diffs: int = 0
show_diffs: InitVar[bool] = False
inclusion_criterion: InitVar[str] = "mismatched_cuda_or_ptx"
inclusion_criterion: InitVar[str] = "mismatched_sass"
preamble_diff: str = field(init=False)
env_diff: str = field(init=False)

Expand Down Expand Up @@ -823,8 +886,8 @@ def __post_init__(self, show_diffs: bool, kernel_inclusion_criterion: str):
for kernel_num in range(minimum_kernel_count):
kern1 = self.run1.get_kernel(testname, kernel_num, strip_preamble=True)
kern2 = self.run2.get_kernel(testname, kernel_num, strip_preamble=True)
assert kern1.code is not None
assert kern2.code is not None
assert kern1.cuda is not None
assert kern2.cuda is not None

ptx_diff_lines = None
if kern1.ptx is not None and kern2.ptx is not None:
Expand All @@ -838,10 +901,22 @@ def __post_init__(self, show_diffs: bool, kernel_inclusion_criterion: str):
)
)

sass_diff_lines = None
if kern1.sass is not None and kern2.sass is not None:
sass_diff_lines = list(
difflib.unified_diff(
sanitize_sass_lines(kern1.sass.splitlines()),
sanitize_sass_lines(kern2.sass.splitlines()),
fromfile=self.run1.name,
tofile=self.run2.name,
n=5,
)
)

diff_lines = list(
difflib.unified_diff(
kern1.code.splitlines(),
kern2.code.splitlines(),
kern1.cuda.splitlines(),
kern2.cuda.splitlines(),
fromfile=self.run1.name,
tofile=self.run2.name,
n=5,
Expand All @@ -860,6 +935,16 @@ def __post_init__(self, show_diffs: bool, kernel_inclusion_criterion: str):
and ptx_diff_lines is not None
and len(ptx_diff_lines) > 0
)
or (
kernel_inclusion_criterion
in [
"mismatched_cuda_or_ptx",
"mismatched_ptx",
"mismatched_sass",
]
and sass_diff_lines is not None
and len(sass_diff_lines) > 0
)
):
kd = KernelDiff(
testname,
Expand All @@ -868,6 +953,7 @@ def __post_init__(self, show_diffs: bool, kernel_inclusion_criterion: str):
kern2,
diff_lines,
ptx_diff_lines=ptx_diff_lines,
sass_diff_lines=sass_diff_lines,
)
if show_diffs:
print(testname, kernel_num, kd.diff)
Expand Down Expand Up @@ -994,8 +1080,8 @@ def join_jsons(args: dict):
diff_parser.add_argument(
"--kernel-inclusion-criterion",
"-i",
choices=("mismatched_cuda_or_ptx", "mismatched_ptx", "all"),
default="mismatched_cuda_or_ptx",
choices=("mismatched_cuda_or_ptx", "mismatched_ptx", "mismatched_sass", "all"),
default="mismatched_sass",
help="Which kernels should we include?",
)
diff_parser.add_argument(
Expand Down
6 changes: 3 additions & 3 deletions tools/codediff/compare_codegen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,16 @@ scriptdir=$(mktemp -d -t codediffXXXXXX)
cp -r "$nvfuserdir/tools/codediff/"* "$scriptdir/"

movecudafiles() {
find . -maxdepth 1 \( -name '__tmp_*.cu' -o -name '__tmp_*.ptx' \) -exec mv '{}' "$1" \;
find . -maxdepth 1 \( -name '__tmp_*.cu' -o -name '__tmp_*.ptx' -o -name '__tmp_*.sass' \) -exec mv '{}' "$1" \;
}

cleanup() {
numkernels=$(find . -maxdepth 1 -name '__tmp_*.cu' -o -name '__tmp_*.ptx' | wc -l)
numkernels=$(find . -maxdepth 1 -name '__tmp_*.cu' -o -name '__tmp_*.ptx' -o -name '__tmp_*.sass' | wc -l)

if (( numkernels > 0 ))
then
backupdir=$outdir/${currentcommit}-interrupted
echo "Interrupted. Backing up $numkernels .cu and .ptx files to $backupdir"
echo "Interrupted. Backing up $numkernels .cu, .ptx, and .sass files to $backupdir"
mkdir -p "$backupdir"
movecudafiles "$backupdir"
fi
Expand Down
14 changes: 8 additions & 6 deletions tools/codediff/run_command.sh
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,10 @@ then
fi
mkdir -p "$testdir"
movecudafiles() {
mkdir -p "$1/cuda" "$1/ptx"
mkdir -p "$1/cuda" "$1/ptx" "$1/sass"
find . -maxdepth 1 -name '__tmp_*.cu' -print0 | xargs -0 --no-run-if-empty mv -t "$1/cuda"
find . -maxdepth 1 -name '__tmp_*.ptx' -print0 | xargs -0 --no-run-if-empty mv -t "$1/ptx"
find . -maxdepth 1 -name '__tmp_*.sass' -print0 | xargs -0 --no-run-if-empty mv -t "$1/sass"
}
removecudafiles() {
tmpdir="./.nvfuser_run_command_tmp"
Expand Down Expand Up @@ -150,9 +151,10 @@ fi
cleanup() {
numcu=$(find . -maxdepth 1 -name '__tmp_*.cu' | wc -l)
numptx=$(find . -maxdepth 1 -name '__tmp_*.ptx' | wc -l)
if (( numcu + numptx > 0 ))
numsass=$(find . -maxdepth 1 -name '__tmp_*.sass' | wc -l)
if (( numcu + numptx + numsass > 0 ))
then
echo "Interrupted. Removing $numcu temporary .cu files and $numptx temporary .ptx files"
echo "Interrupted. Removing $numcu .cu, $numptx .ptx, and $numsass .sass temporary files"
removecudafiles
fi
# strip incomplete- from base name
Expand Down Expand Up @@ -192,7 +194,7 @@ ensure_in_list() {
echo "${l[*]}"
}
# ensure some NVFUSER_DUMP options are enabled
appended_dump=$(ensure_in_list "$NVFUSER_DUMP" cuda_to_file ptxas_verbose ptx)
appended_dump=$(ensure_in_list "$NVFUSER_DUMP" cuda_to_file ptxas_verbose ptx sass_to_file)
export NVFUSER_DUMP=$appended_dump
appended_enable=$(ensure_in_list "$NVFUSER_ENABLE" static_fusion_count)
export NVFUSER_ENABLE=$appended_enable
Expand All @@ -217,7 +219,7 @@ echo "$testcmd" > "$testdir/command"
if [[ -z $commandtype ]]
then
case "$testcmd" in
*test_nvfuser*)
*test_*)
;&
*tutorial_*)
;&
Expand Down Expand Up @@ -269,5 +271,5 @@ hostname > "$testdir/hostname"
printenv | sort > "$testdir/env"
nvcc --version > "$testdir/nvcc_version"
nvidia-smi --query-gpu=gpu_name --format=csv,noheader > "$testdir/gpu_names"
# save generated cuda and ptx files
# save generated cuda, ptx, sass files
movecudafiles "$testdir"
Loading