From c59daafd9628231a4224d8fa3a92dd59184be085 Mon Sep 17 00:00:00 2001 From: Han Gao Date: Tue, 9 Sep 2025 17:05:53 +0800 Subject: [PATCH 1/4] Revert "FROMLIST: drm/ttm: downgrade cached to write_combined when snooping not available" This reverts commit e9572e90f7e6da71991183b26c0df29e8584515e. --- drivers/gpu/drm/ttm/ttm_bo_util.c | 4 ---- drivers/gpu/drm/ttm/ttm_tt.c | 4 ---- include/drm/ttm/ttm_caching.h | 3 +-- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index ee700175298ca4..bd90404ea609ca 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -303,10 +303,6 @@ pgprot_t ttm_io_prot(struct ttm_buffer_object *bo, struct ttm_resource *res, caching = res->bus.caching; } - /* Downgrade cached mapping for non-snooping devices */ - if (!bo->bdev->dma_coherent && caching == ttm_cached) - caching = ttm_write_combined; - return ttm_prot_from_caching(caching, tmp); } EXPORT_SYMBOL(ttm_io_prot); diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 609bdfbd7491d8..698cd4bf5e4648 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -153,10 +153,6 @@ static void ttm_tt_init_fields(struct ttm_tt *ttm, enum ttm_caching caching, unsigned long extra_pages) { - /* Downgrade cached mapping for non-snooping devices */ - if (!bo->bdev->dma_coherent && caching == ttm_cached) - caching = ttm_write_combined; - ttm->num_pages = (PAGE_ALIGN(bo->base.size) >> PAGE_SHIFT) + extra_pages; ttm->page_flags = page_flags; ttm->dma_address = NULL; diff --git a/include/drm/ttm/ttm_caching.h b/include/drm/ttm/ttm_caching.h index f92d7911f50e4d..a18f43e93abab4 100644 --- a/include/drm/ttm/ttm_caching.h +++ b/include/drm/ttm/ttm_caching.h @@ -47,8 +47,7 @@ enum ttm_caching { /** * @ttm_cached: Fully cached like normal system memory, requires that - * devices snoop the CPU cache on accesses. Downgraded to - * ttm_write_combined when the snooping capaiblity is missing. + * devices snoop the CPU cache on accesses. */ ttm_cached }; From 0b32a84261cebfaf13162ed8398e80588fce29c4 Mon Sep 17 00:00:00 2001 From: Han Gao Date: Tue, 9 Sep 2025 17:06:05 +0800 Subject: [PATCH 2/4] Revert "FROMLIST: drm/ttm: save the device's DMA coherency status in ttm_device" This reverts commit f251c16216ef97fa7158ffea052c878aebbcad4e. --- drivers/gpu/drm/ttm/ttm_device.c | 2 -- include/drm/ttm/ttm_device.h | 5 ----- 2 files changed, 7 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index b777a63c896a12..02e797fd1891ac 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -222,8 +222,6 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&ttm_global_mutex); - bdev->dma_coherent = dev->dma_coherent; - return 0; } EXPORT_SYMBOL(ttm_device_init); diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 0f4276f07e6e5f..07594b72de6366 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -225,11 +225,6 @@ struct ttm_device { */ const struct ttm_device_funcs *funcs; - /** - * @dma_coherent: if the device backed is dma-coherent. - */ - bool dma_coherent; - /** * @sysman: Resource manager for the system domain. * Access via ttm_manager_type. From 3f98079032da49baf29b019efd5562676f2f3c32 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 29 Jun 2024 13:22:46 +0800 Subject: [PATCH 3/4] BACKPORT: FROMLIST: drm/ttm: save the device's DMA coherency status in ttm_device Currently TTM utilizes cached memory regardless of whether the device have full DMA coherency (can snoop CPU cache). Save the device's DMA coherency status in struct ttm_device, to allow further support of devices w/o snooping capability (the capability missing on at least one part of the transmission between the CPU and the device). Signed-off-by: Icenowy Zheng Link: https://lore.kernel.org/r/20240629052247.2653363-2-uwu@icenowy.me [ Han Gao: add conditional compilation for dma_coherent ] Signed-off-by: Han Gao --- drivers/gpu/drm/ttm/ttm_device.c | 6 ++++++ include/drm/ttm/ttm_device.h | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 02e797fd1891ac..fffcb34b57b85f 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -222,6 +222,12 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&ttm_global_mutex); +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) + bdev->dma_coherent = dev->dma_coherent; +#endif + return 0; } EXPORT_SYMBOL(ttm_device_init); diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 07594b72de6366..785d0d637df25d 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -225,6 +225,15 @@ struct ttm_device { */ const struct ttm_device_funcs *funcs; +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) + /** + * @dma_coherent: if the device backed is dma-coherent. + */ + bool dma_coherent; +#endif + /** * @sysman: Resource manager for the system domain. * Access via ttm_manager_type. From 78c682f156e71c1cef3210ca69042f7d9b258aae Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 29 Jun 2024 13:22:47 +0800 Subject: [PATCH 4/4] BACKPORT: FROMLIST: drm/ttm: downgrade cached to write_combined when snooping not available As we can now acquire the presence of the full DMA coherency (snooping capability) from ttm_device, we can now map the CPU side memory as write-combined when cached is requested and snooping is not avilable. Signed-off-by: Icenowy Zheng Link: https://lore.kernel.org/r/20240629052247.2653363-3-uwu@icenowy.me [ Han Gao: add conditional compilation for dma coherent operations ] Signed-off-by: Han Gao --- drivers/gpu/drm/ttm/ttm_bo_util.c | 8 ++++++++ drivers/gpu/drm/ttm/ttm_tt.c | 8 ++++++++ include/drm/ttm/ttm_caching.h | 3 ++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index bd90404ea609ca..3135508142b503 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -303,6 +303,14 @@ pgprot_t ttm_io_prot(struct ttm_buffer_object *bo, struct ttm_resource *res, caching = res->bus.caching; } +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) + /* Downgrade cached mapping for non-snooping devices */ + if (!bo->bdev->dma_coherent && caching == ttm_cached) + caching = ttm_write_combined; +#endif + return ttm_prot_from_caching(caching, tmp); } EXPORT_SYMBOL(ttm_io_prot); diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 698cd4bf5e4648..dacb5b8b0df96f 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -153,6 +153,14 @@ static void ttm_tt_init_fields(struct ttm_tt *ttm, enum ttm_caching caching, unsigned long extra_pages) { +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) + /* Downgrade cached mapping for non-snooping devices */ + if (!bo->bdev->dma_coherent && caching == ttm_cached) + caching = ttm_write_combined; +#endif + ttm->num_pages = (PAGE_ALIGN(bo->base.size) >> PAGE_SHIFT) + extra_pages; ttm->page_flags = page_flags; ttm->dma_address = NULL; diff --git a/include/drm/ttm/ttm_caching.h b/include/drm/ttm/ttm_caching.h index a18f43e93abab4..f92d7911f50e4d 100644 --- a/include/drm/ttm/ttm_caching.h +++ b/include/drm/ttm/ttm_caching.h @@ -47,7 +47,8 @@ enum ttm_caching { /** * @ttm_cached: Fully cached like normal system memory, requires that - * devices snoop the CPU cache on accesses. + * devices snoop the CPU cache on accesses. Downgraded to + * ttm_write_combined when the snooping capaiblity is missing. */ ttm_cached };