Skip to content

Commit 28b1c83

Browse files
committed
Merge ROCm 1.5.1 changes into roc-1.5.x
2 parents 757f29e + 8ca2ad0 commit 28b1c83

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+15012
-556
lines changed

arch/arm64/configs/rock-dbg_defconfig

+4,479
Large diffs are not rendered by default.

arch/powerpc/configs/rock-dbg_defconfig

+7,821
Large diffs are not rendered by default.

arch/powerpc/platforms/powernv/pci-ioda.c

+79-2
Original file line numberDiff line numberDiff line change
@@ -1717,6 +1717,62 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
17171717
*/
17181718
}
17191719

1720+
static void pnv_pci_ioda_dma_sketchy_bypass(struct pnv_ioda_pe *pe)
1721+
{
1722+
/* Enable a transparent bypass into TVE #1 through DMA window 0 */
1723+
s64 rc;
1724+
u64 addr;
1725+
u64 tce_count;
1726+
u64 table_size;
1727+
u64 tce_order = 28; /* 256MB TCEs */
1728+
u64 window_size = memory_hotplug_max() + (1ULL << 32);
1729+
struct page *table_pages;
1730+
__be64 *tces;
1731+
1732+
window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32));
1733+
tce_count = window_size >> tce_order;
1734+
table_size = tce_count << 3;
1735+
1736+
pr_debug("ruscur: table_size %016llx PAGE_SIZE %016lx\n",
1737+
table_size, PAGE_SIZE);
1738+
if (table_size < PAGE_SIZE) {
1739+
pr_debug("ruscur: set table_size to PAGE_SIZE\n");
1740+
table_size = PAGE_SIZE;
1741+
}
1742+
1743+
pr_debug("ruscur: tce_count %016llx table_size %016llx\n",
1744+
tce_count, table_size);
1745+
1746+
table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL,
1747+
get_order(table_size));
1748+
1749+
pr_debug("ruscur: got table_pages %p\n", table_pages);
1750+
/* TODO null checking */
1751+
tces = page_address(table_pages);
1752+
pr_debug("ruscur: got tces %p\n", tces);
1753+
memset(tces, 0, table_size);
1754+
1755+
for (addr = 0; addr < memory_hotplug_max(); addr += (1 << tce_order)) {
1756+
pr_debug("ruscur: addr %016llx index %016llx\n", addr,
1757+
(addr + (1ULL << 32)) >> tce_order);
1758+
tces[(addr + (1ULL << 32)) >> tce_order] =
1759+
cpu_to_be64(addr | TCE_PCI_READ | TCE_PCI_WRITE);
1760+
}
1761+
1762+
rc = opal_pci_map_pe_dma_window(pe->phb->opal_id,
1763+
pe->pe_number,
1764+
/* reconfigure window 0 */
1765+
(pe->pe_number << 1) + 0,
1766+
1, /* level (unsure what this means) */
1767+
__pa(tces),
1768+
table_size,
1769+
1 << tce_order);
1770+
if (rc)
1771+
pe_err(pe, "OPAL error %llx in sketchy bypass\n", rc);
1772+
else
1773+
pe_info(pe, "ruscur's sketchy bypass worked, apparently\n");
1774+
}
1775+
17201776
static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
17211777
{
17221778
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
@@ -1739,8 +1795,29 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
17391795
dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
17401796
set_dma_ops(&pdev->dev, &dma_direct_ops);
17411797
} else {
1742-
dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
1743-
set_dma_ops(&pdev->dev, &dma_iommu_ops);
1798+
/* Find out if we want to address more than 2G */
1799+
dev_info(&pdev->dev, "My dma_mask is %016llx\n", dma_mask);
1800+
if (dma_mask >> 32 /*&& pe->device_count == 1*/) {
1801+
/*
1802+
* TODO
1803+
* This mode shouldn't be used if the PE has any other
1804+
* device on it. Things will go wrong.
1805+
* We can't just check for device_count of 1 though,
1806+
* because of things like GPUs with audio devices and
1807+
* stuff like that. So we should walk the PE and check
1808+
* if everything else on it has the same vendor ID...?
1809+
*/
1810+
dev_info(&pdev->dev, "%d devices on my PE\n",
1811+
pe->device_count);
1812+
/* Set up the bypass mode */
1813+
pnv_pci_ioda_dma_sketchy_bypass(pe);
1814+
/* 4GB offset places us into TVE#1 */
1815+
set_dma_offset(&pdev->dev, (1ULL << 32));
1816+
set_dma_ops(&pdev->dev, &dma_direct_ops);
1817+
} else {
1818+
dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
1819+
set_dma_ops(&pdev->dev, &dma_iommu_ops);
1820+
}
17441821
}
17451822
*pdev->dev.dma_mask = dma_mask;
17461823

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <linux/list.h>
3030
#include <drm/drmP.h>
3131
#include <linux/dma-buf.h>
32+
#include <linux/pagemap.h>
3233
#include "amdgpu_amdkfd.h"
3334
#include "amdgpu_ucode.h"
3435
#include "gca/gfx_8_0_sh_mask.h"

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

+2
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,9 @@ static const struct pci_device_id pciidlist[] = {
461461
{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
462462
{0x1002, 0x6862, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
463463
{0x1002, 0x6863, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
464+
{0x1002, 0x6864, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
464465
{0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
466+
{0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
465467
{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
466468
{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
467469
{0, 0, 0}

drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c

+2-10
Original file line numberDiff line numberDiff line change
@@ -867,8 +867,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
867867

868868
pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
869869

870-
/* never 0 (full-speed), fuse or smc-controlled always */
871-
return sprintf(buf, "%i\n", pwm_mode == FDO_PWM_MODE_STATIC ? 1 : 2);
870+
return sprintf(buf, "%i\n", pwm_mode);
872871
}
873872

874873
static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
@@ -887,14 +886,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
887886
if (err)
888887
return err;
889888

890-
switch (value) {
891-
case 1: /* manual, percent-based */
892-
amdgpu_dpm_set_fan_control_mode(adev, FDO_PWM_MODE_STATIC);
893-
break;
894-
default: /* disable */
895-
amdgpu_dpm_set_fan_control_mode(adev, 0);
896-
break;
897-
}
889+
amdgpu_dpm_set_fan_control_mode(adev, value);
898890

899891
return count;
900892
}

drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

+5-2
Original file line numberDiff line numberDiff line change
@@ -4846,8 +4846,11 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
48464846
/* enable the doorbell if requested */
48474847
if (ring->use_doorbell) {
48484848
if ((adev->asic_type == CHIP_CARRIZO) ||
4849-
(adev->asic_type == CHIP_FIJI) ||
4850-
(adev->asic_type == CHIP_STONEY)) {
4849+
(adev->asic_type == CHIP_FIJI) ||
4850+
(adev->asic_type == CHIP_STONEY) ||
4851+
(adev->asic_type == CHIP_POLARIS10) ||
4852+
(adev->asic_type == CHIP_POLARIS11) ||
4853+
(adev->asic_type == CHIP_POLARIS12)) {
48514854
WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
48524855
AMDGPU_DOORBELL_KIQ << 2);
48534856
WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,

drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
395395
* in visible VRAM and the address space. Use at most
396396
* half of each. */
397397
uint64_t max_gtt_size = min(
398-
adev->mc.visible_vram_size / 8 * PAGE_SIZE / 2,
398+
adev->mc.visible_vram_size / 8 *
399+
AMDGPU_GPU_PAGE_SIZE / 2,
399400
1ULL << 39);
400401

401402
si_meminfo(&si);

drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
562562
* in visible VRAM and the address space. Use at most
563563
* half of each. */
564564
uint64_t max_gtt_size = min(
565-
adev->mc.visible_vram_size / 8 * PAGE_SIZE / 2,
565+
adev->mc.visible_vram_size / 8 *
566+
AMDGPU_GPU_PAGE_SIZE / 2,
566567
1ULL << 39);
567568

568569
si_meminfo(&si);

drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
468468
* half of each.
469469
*/
470470
uint64_t max_gtt_size = min(
471-
adev->mc.visible_vram_size / 8 * PAGE_SIZE / 2,
471+
adev->mc.visible_vram_size / 8 *
472+
AMDGPU_GPU_PAGE_SIZE / 2,
472473
1ULL << 39);
473474

474475
si_meminfo(&si);

drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c

+7
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,13 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
628628

629629
if (adev->mman.buffer_funcs_ring == ring)
630630
amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);
631+
632+
/* FIXME: temporarily disable SDMA-ULV interrupts for Vega10.
633+
* Remove this once the fix is in firmware.
634+
*/
635+
if (ring->adev->asic_type == CHIP_VEGA10)
636+
WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_ULV_CNTL),
637+
0);
631638
}
632639

633640
return 0;

drivers/gpu/drm/amd/amdkfd/Kconfig

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
config HSA_AMD
66
tristate "HSA kernel driver for AMD GPU devices"
7-
depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64
7+
depends on (DRM_RADEON || DRM_AMDGPU) && (X86_64 || PPC64 || ARM64)
88
select DRM_AMDGPU_USERPTR
99
help
1010
Enable this if you want to use HSA features on AMD GPU devices.

drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

+1
Original file line numberDiff line numberDiff line change
@@ -2072,6 +2072,7 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep,
20722072
space_left -= copied;
20732073
dst_va_addr += copied;
20742074
dst_offset += copied;
2075+
src_offset += copied;
20752076
if (dst_va_addr > dst_bo->it.last + 1) {
20762077
pr_err("Cross mem copy failed. Memory overflow\n");
20772078
err = -EFAULT;

drivers/gpu/drm/amd/amdkfd/kfd_crat.c

+19-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#include <linux/kernel.h>
22
#include <linux/acpi.h>
33
#include <linux/mm.h>
4+
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
45
#include <linux/amd-iommu.h>
6+
#endif
57
#include <linux/pci.h>
68
#include "kfd_crat.h"
79
#include "kfd_priv.h"
@@ -664,6 +666,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
664666
*
665667
* Return 0 if successful else return -ve value
666668
*/
669+
#ifdef CONFIG_ACPI
667670
int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
668671
{
669672
struct acpi_table_header *crat_table;
@@ -706,6 +709,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
706709

707710
return 0;
708711
}
712+
#endif
709713

710714
/* Memory required to create Virtual CRAT.
711715
* Since there is no easy way to predict the amount of memory required, the
@@ -808,12 +812,14 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
808812
static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
809813
{
810814
struct crat_header *crat_table = (struct crat_header *)pcrat_image;
811-
struct acpi_table_header *acpi_table;
812-
acpi_status status;
813815
struct crat_subtype_generic *sub_type_hdr;
814816
int avail_size = *size;
815817
int numa_node_id;
816818
int ret = 0;
819+
#ifdef CONFIG_ACPI
820+
struct acpi_table_header *acpi_table;
821+
acpi_status status;
822+
#endif
817823

818824
if (pcrat_image == NULL || avail_size < VCRAT_SIZE_FOR_CPU)
819825
return -EINVAL;
@@ -829,6 +835,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
829835
memcpy(&crat_table->signature, CRAT_SIGNATURE, sizeof(crat_table->signature));
830836
crat_table->length = sizeof(struct crat_header);
831837

838+
#ifdef CONFIG_ACPI
832839
status = acpi_get_table("DSDT", 0, &acpi_table);
833840
if (status == AE_NOT_FOUND)
834841
pr_warn("DSDT table not found for OEM information\n");
@@ -837,6 +844,11 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
837844
memcpy(crat_table->oem_id, acpi_table->oem_id, CRAT_OEMID_LENGTH);
838845
memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, CRAT_OEMTABLEID_LENGTH);
839846
}
847+
#else
848+
crat_table->oem_revision = 0;
849+
memcpy(crat_table->oem_id, "INV", CRAT_OEMID_LENGTH);
850+
memcpy(crat_table->oem_table_id, "UNAVAIL", CRAT_OEMTABLEID_LENGTH);
851+
#endif
840852
crat_table->total_entries = 0;
841853
crat_table->num_domains = 0;
842854

@@ -969,15 +981,17 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
969981
struct crat_subtype_generic *sub_type_hdr;
970982
struct crat_subtype_computeunit *cu;
971983
struct kfd_cu_info cu_info;
972-
struct amd_iommu_device_info iommu_info;
973984
int avail_size = *size;
974985
uint32_t total_num_of_cu;
975986
int num_of_cache_entries = 0;
976987
int cache_mem_filled = 0;
977988
int ret = 0;
989+
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
990+
struct amd_iommu_device_info iommu_info;
978991
const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
979992
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
980993
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
994+
#endif
981995
struct kfd_local_mem_info local_mem_info;
982996

983997
if (pcrat_image == NULL || avail_size < VCRAT_SIZE_FOR_GPU)
@@ -1035,11 +1049,13 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
10351049

10361050
/* Check if this node supports IOMMU. During parsing this flag will
10371051
* translate to HSA_CAP_ATS_PRESENT */
1052+
#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
10381053
iommu_info.flags = 0;
10391054
if (0 == amd_iommu_device_info(kdev->pdev, &iommu_info)) {
10401055
if ((iommu_info.flags & required_iommu_flags) == required_iommu_flags)
10411056
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
10421057
}
1058+
#endif
10431059

10441060
crat_table->length += sub_type_hdr->length;
10451061
crat_table->total_entries++;

drivers/gpu/drm/amd/amdkfd/kfd_crat.h

+2
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,9 @@ struct cdit_header {
308308

309309
#pragma pack()
310310

311+
#ifdef CONFIG_ACPI
311312
int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
313+
#endif
312314
void kfd_destroy_crat_image(void *crat_image);
313315
int kfd_parse_crat_table(void *crat_image,
314316
struct list_head *device_list,

0 commit comments

Comments
 (0)