|
| 1 | +""" |
| 2 | +Address space tests for the AMDGPU plugin. |
| 3 | +""" |
| 4 | + |
| 5 | +import lldb |
| 6 | +import lldbsuite.test.lldbutil as lldbutil |
| 7 | +from lldbsuite.test.lldbtest import * |
| 8 | +from amdgpu_testcase import * |
| 9 | + |
| 10 | +SOURCE = "aspace.hip" |
| 11 | + |
| 12 | +# The following addresses are offsets from the start of the lane private stack |
| 13 | +# for the location where these kernel local variables are stored. Note that |
| 14 | +# these addresses depend on the stack layout by the compiler. They seem to be |
| 15 | +# pretty consistent across rocm versions, but be aware that these private stack |
| 16 | +# addresses could change. |
| 17 | +SIZE_STACK_OFFSET = 0x20 |
| 18 | +IDX_STACK_OFFSET = 0x24 |
| 19 | + |
| 20 | +# The wave size is the number of lanes in a wave. |
| 21 | +WAVE_SIZE = 64 |
| 22 | + |
| 23 | + |
| 24 | +class Location: |
| 25 | + """Helper class to describe a location in memory with an expected value""" |
| 26 | + |
| 27 | + def __init__(self, name, expected_value, size_in_bytes, address): |
| 28 | + self.name = name |
| 29 | + self.address = address |
| 30 | + if isinstance(expected_value, list): |
| 31 | + self.expected_bytes = b"".join( |
| 32 | + [n.to_bytes(size_in_bytes, "little") for n in expected_value] |
| 33 | + ) |
| 34 | + self.size_in_bytes = size_in_bytes * len(expected_value) |
| 35 | + else: |
| 36 | + self.expected_bytes = expected_value.to_bytes(size_in_bytes, "little") |
| 37 | + self.size_in_bytes = size_in_bytes |
| 38 | + |
| 39 | + |
| 40 | +class AddressSpaceAmdGpuTestCase(AmdGpuTestCaseBase): |
| 41 | + def validate_memory_read( |
| 42 | + self, address_space: str, loc: Location, thread: lldb.SBThread |
| 43 | + ): |
| 44 | + """Helper function to validate memory read from an address space""" |
| 45 | + addr_spec = lldb.SBAddressSpec(loc.address, address_space, thread) |
| 46 | + error = lldb.SBError() |
| 47 | + data = self.gpu_process.ReadMemoryFromSpec(addr_spec, loc.size_in_bytes, error) |
| 48 | + self.assertTrue( |
| 49 | + error.Success(), |
| 50 | + f"{loc.name} reading from address space '{address_space}' failed: {str(error)}", |
| 51 | + ) |
| 52 | + self.assertEqual( |
| 53 | + data, |
| 54 | + loc.expected_bytes, |
| 55 | + f"Data for Location {loc.name} does not match expected value", |
| 56 | + ) |
| 57 | + |
| 58 | + def validate_read_address_from_global_variable( |
| 59 | + self, address_space: str, location: Location |
| 60 | + ): |
| 61 | + """Helper function to check we can read from address space using an address from a global variable. |
| 62 | + The global variable lookup is done on the cpu side and its value is added to the current location |
| 63 | + address to allow easy offsetting from a global variable that represents an array. |
| 64 | + """ |
| 65 | + self.build() |
| 66 | + |
| 67 | + lldbutil.run_to_source_breakpoint( |
| 68 | + self, "// CPU BREAKPOINT - BEFORE LAUNCH", lldb.SBFileSpec(SOURCE) |
| 69 | + ) |
| 70 | + |
| 71 | + # Find the variable in the CPU target. |
| 72 | + var = self.cpu_target.FindFirstGlobalVariable(location.name) |
| 73 | + self.assertTrue( |
| 74 | + var.IsValid(), |
| 75 | + f"{location.name} variable should be valid in the CPU target", |
| 76 | + ) |
| 77 | + |
| 78 | + # Get the address stored in the variable (it's a pointer memory). |
| 79 | + addr = var.GetValueAsUnsigned() |
| 80 | + self.assertNotEqual(addr, 0, f"{location.name} address should not be null") |
| 81 | + location.address += addr |
| 82 | + |
| 83 | + # Continue executing to the gpu breakpoint. |
| 84 | + gpu_threads = self.continue_to_gpu_source_breakpoint( |
| 85 | + SOURCE, |
| 86 | + "// GPU BREAKPOINT", |
| 87 | + ) |
| 88 | + self.assertNotEqual(None, gpu_threads, "GPU should be stopped at breakpoint") |
| 89 | + |
| 90 | + # Switch back to GPU to read from generic address space |
| 91 | + self.select_gpu() |
| 92 | + |
| 93 | + self.validate_memory_read(address_space, location, gpu_threads[0]) |
| 94 | + |
| 95 | + def run_to_first_gpu_breakpoint(self): |
| 96 | + """Helper to run to common gpu breakpoint""" |
| 97 | + self.build() |
| 98 | + |
| 99 | + gpu_threads = self.run_to_gpu_breakpoint( |
| 100 | + SOURCE, "// GPU BREAKPOINT", "// CPU BREAKPOINT - BEFORE LAUNCH" |
| 101 | + ) |
| 102 | + self.assertNotEqual(None, gpu_threads, "GPU should be stopped at breakpoint") |
| 103 | + return gpu_threads |
| 104 | + |
| 105 | + def test_generic(self): |
| 106 | + """Test reading from the generic address space.""" |
| 107 | + # Read from generic address space using the address stored in the device_output pointer. |
| 108 | + # The device_output variable is a pointer to device visible memory. |
| 109 | + # We expect the element at index 1 to be 2 (output[1] = shared_mem[1] = 1 * 2 = 2). |
| 110 | + location = Location( |
| 111 | + "device_output", |
| 112 | + expected_value=2, |
| 113 | + size_in_bytes=4, |
| 114 | + address=4, # Offset address by 4 bytes to get index 1. |
| 115 | + ) |
| 116 | + self.validate_read_address_from_global_variable("generic", location) |
| 117 | + |
| 118 | + def test_region(self): |
| 119 | + """Test that we fail to read from the region address space. It is not supported on this architecture (MI300/MI350).""" |
| 120 | + self.run_to_first_gpu_breakpoint() |
| 121 | + |
| 122 | + addr_spec = lldb.SBAddressSpec(0, "region") |
| 123 | + error = lldb.SBError() |
| 124 | + self.gpu_process.ReadMemoryFromSpec(addr_spec, 1, error) |
| 125 | + |
| 126 | + self.assertFalse(error.Success(), "Read from region address space should fail") |
| 127 | + self.assertEqual( |
| 128 | + "AMD_DBGAPI_STATUS_ERROR: AMD_DBGAPI_STATUS_ERROR_INVALID_ARGUMENT_COMPATIBILITY", |
| 129 | + error.GetCString(), |
| 130 | + ) |
| 131 | + |
| 132 | + def test_local(self): |
| 133 | + """Test that we can read from local memory.""" |
| 134 | + gpu_threads = self.run_to_first_gpu_breakpoint() |
| 135 | + |
| 136 | + # Check that we can read local memory locations. |
| 137 | + # We expect the element at index 3 to be 6 (shared_mem[3] = 3 * 2 = 6). |
| 138 | + location = Location("shared_mem", expected_value=6, size_in_bytes=4, address=12) |
| 139 | + self.validate_memory_read("local", location, gpu_threads[0]) |
| 140 | + |
| 141 | + def test_private_lane(self): |
| 142 | + """Test that we can read from the private_lane address space.""" |
| 143 | + gpu_threads = self.run_to_first_gpu_breakpoint() |
| 144 | + |
| 145 | + # Check that we can read private_lane memory Locations. |
| 146 | + # These locations map to local variables on the private stack memory for |
| 147 | + # each lane. The `size` value is the input parameter to the kernel and |
| 148 | + # the `idx` value is the local variable in the kernel that stores the |
| 149 | + # threadIdx.x value. We check the `idx` variable twice since it should |
| 150 | + # have different values for lane 0 and lane 1. |
| 151 | + checks = [ |
| 152 | + ( |
| 153 | + Location( |
| 154 | + "size", |
| 155 | + expected_value=WAVE_SIZE, |
| 156 | + size_in_bytes=4, |
| 157 | + address=SIZE_STACK_OFFSET, |
| 158 | + ), |
| 159 | + gpu_threads[0], |
| 160 | + ), |
| 161 | + ( |
| 162 | + Location( |
| 163 | + "idx", expected_value=0, size_in_bytes=4, address=IDX_STACK_OFFSET |
| 164 | + ), |
| 165 | + gpu_threads[0], |
| 166 | + ), |
| 167 | + ( |
| 168 | + Location( |
| 169 | + "idx", expected_value=1, size_in_bytes=4, address=IDX_STACK_OFFSET |
| 170 | + ), |
| 171 | + gpu_threads[1], |
| 172 | + ), |
| 173 | + ] |
| 174 | + |
| 175 | + for location, thread in checks: |
| 176 | + self.validate_memory_read("private_lane", location, thread) |
| 177 | + |
| 178 | + def test_private_wave(self): |
| 179 | + """Test that we can read from the private_wave address space.""" |
| 180 | + |
| 181 | + # The private_wave address space has the unswizzled values for each lane. |
| 182 | + # This makes it easy for the debugger to read the value of one variable |
| 183 | + # for each lane as consecutive memory locations. |
| 184 | + # |
| 185 | + # The location below describes the first 3 lane values for the idx variable. |
| 186 | + # The memory offset is calculated finding the offset of the idx variable |
| 187 | + # for a lane and then multiplying by the wave size since the unswizzled |
| 188 | + # memory is laid out with the values for each lane in consecutive memory. |
| 189 | + addr = IDX_STACK_OFFSET * WAVE_SIZE |
| 190 | + |
| 191 | + gpu_threads = self.run_to_first_gpu_breakpoint() |
| 192 | + location = Location( |
| 193 | + "idx[0:3]", expected_value=[0, 1, 2], size_in_bytes=4, address=addr |
| 194 | + ) |
| 195 | + self.validate_memory_read("private_wave", location, gpu_threads[0]) |
0 commit comments