Merge tag 'amd-drm-next-6.12-2024-09-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.12-2024-09-13: amdgpu: - GPUVM sync fixes - kdoc fixes - Misc spelling mistakes - Add some raven GFXOFF quirks - Use clamp helper - DC fixes - JPEG fixes - Process isolation fix - Queue reset fix - W=1 cleanup - SMU14 fixes - JPEG fixes amdkfd: - Fetch cacheline info from IP discovery - Queue reset fix - RAS fix - Document SVM events - CRIU fixes - Race fix in dma-buf handling drm: - dma-buf fd race fixes Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240913134139.2861073-1-alexander.deucher@amd.com
This commit is contained in:
@@ -540,26 +540,29 @@ enum kfd_smi_event {
|
||||
KFD_SMI_EVENT_ALL_PROCESS = 64
|
||||
};
|
||||
|
||||
/* The reason of the page migration event */
|
||||
enum KFD_MIGRATE_TRIGGERS {
|
||||
KFD_MIGRATE_TRIGGER_PREFETCH,
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
|
||||
KFD_MIGRATE_TRIGGER_TTM_EVICTION
|
||||
KFD_MIGRATE_TRIGGER_PREFETCH, /* Prefetch to GPU VRAM or system memory */
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, /* GPU page fault recover */
|
||||
KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, /* CPU page fault recover */
|
||||
KFD_MIGRATE_TRIGGER_TTM_EVICTION /* TTM eviction */
|
||||
};
|
||||
|
||||
/* The reason of user queue evition event */
|
||||
enum KFD_QUEUE_EVICTION_TRIGGERS {
|
||||
KFD_QUEUE_EVICTION_TRIGGER_SVM,
|
||||
KFD_QUEUE_EVICTION_TRIGGER_USERPTR,
|
||||
KFD_QUEUE_EVICTION_TRIGGER_TTM,
|
||||
KFD_QUEUE_EVICTION_TRIGGER_SUSPEND,
|
||||
KFD_QUEUE_EVICTION_CRIU_CHECKPOINT,
|
||||
KFD_QUEUE_EVICTION_CRIU_RESTORE
|
||||
KFD_QUEUE_EVICTION_TRIGGER_SVM, /* SVM buffer migration */
|
||||
KFD_QUEUE_EVICTION_TRIGGER_USERPTR, /* userptr movement */
|
||||
KFD_QUEUE_EVICTION_TRIGGER_TTM, /* TTM move buffer */
|
||||
KFD_QUEUE_EVICTION_TRIGGER_SUSPEND, /* GPU suspend */
|
||||
KFD_QUEUE_EVICTION_CRIU_CHECKPOINT, /* CRIU checkpoint */
|
||||
KFD_QUEUE_EVICTION_CRIU_RESTORE /* CRIU restore */
|
||||
};
|
||||
|
||||
/* The reason of unmap buffer from GPU event */
|
||||
enum KFD_SVM_UNMAP_TRIGGERS {
|
||||
KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY,
|
||||
KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,
|
||||
KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU
|
||||
KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY, /* MMU notifier CPU buffer movement */
|
||||
KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,/* MMU notifier page migration */
|
||||
KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU /* Unmap to free the buffer */
|
||||
};
|
||||
|
||||
#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
|
||||
@@ -570,6 +573,77 @@ struct kfd_ioctl_smi_events_args {
|
||||
__u32 anon_fd; /* from KFD */
|
||||
};
|
||||
|
||||
/*
|
||||
* SVM event tracing via SMI system management interface
|
||||
*
|
||||
* Open event file descriptor
|
||||
* use ioctl AMDKFD_IOC_SMI_EVENTS, pass in gpuid and return a anonymous file
|
||||
* descriptor to receive SMI events.
|
||||
* If calling with sudo permission, then file descriptor can be used to receive
|
||||
* SVM events from all processes, otherwise, to only receive SVM events of same
|
||||
* process.
|
||||
*
|
||||
* To enable the SVM event
|
||||
* Write event file descriptor with KFD_SMI_EVENT_MASK_FROM_INDEX(event) bitmap
|
||||
* mask to start record the event to the kfifo, use bitmap mask combination
|
||||
* for multiple events. New event mask will overwrite the previous event mask.
|
||||
* KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS) bit requires sudo
|
||||
* permisson to receive SVM events from all process.
|
||||
*
|
||||
* To receive the event
|
||||
* Application can poll file descriptor to wait for the events, then read event
|
||||
* from the file into a buffer. Each event is one line string message, starting
|
||||
* with the event id, then the event specific information.
|
||||
*
|
||||
* To decode event information
|
||||
* The following event format string macro can be used with sscanf to decode
|
||||
* the specific event information.
|
||||
* event triggers: the reason to generate the event, defined as enum for unmap,
|
||||
* eviction and migrate events.
|
||||
* node, from, to, prefetch_loc, preferred_loc: GPU ID, or 0 for system memory.
|
||||
* addr: user mode address, in pages
|
||||
* size: in pages
|
||||
* pid: the process ID to generate the event
|
||||
* ns: timestamp in nanosecond-resolution, starts at system boot time but
|
||||
* stops during suspend
|
||||
* migrate_update: GPU page fault is recovered by 'M' for migrate, 'U' for update
|
||||
* rw: 'W' for write page fault, 'R' for read page fault
|
||||
* rescheduled: 'R' if the queue restore failed and rescheduled to try again
|
||||
*/
|
||||
#define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\
|
||||
"%x %s\n", (reset_seq_num), (reset_cause)
|
||||
|
||||
#define KFD_EVENT_FMT_THERMAL_THROTTLING(bitmask, counter)\
|
||||
"%llx:%llx\n", (bitmask), (counter)
|
||||
|
||||
#define KFD_EVENT_FMT_VMFAULT(pid, task_name)\
|
||||
"%x:%s\n", (pid), (task_name)
|
||||
|
||||
#define KFD_EVENT_FMT_PAGEFAULT_START(ns, pid, addr, node, rw)\
|
||||
"%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (rw)
|
||||
|
||||
#define KFD_EVENT_FMT_PAGEFAULT_END(ns, pid, addr, node, migrate_update)\
|
||||
"%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (migrate_update)
|
||||
|
||||
#define KFD_EVENT_FMT_MIGRATE_START(ns, pid, start, size, from, to, prefetch_loc,\
|
||||
preferred_loc, migrate_trigger)\
|
||||
"%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\
|
||||
(from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger)
|
||||
|
||||
#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger)\
|
||||
"%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size),\
|
||||
(from), (to), (migrate_trigger)
|
||||
|
||||
#define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\
|
||||
"%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger)
|
||||
|
||||
#define KFD_EVENT_FMT_QUEUE_RESTORE(ns, pid, node, rescheduled)\
|
||||
"%lld -%d %x %c\n", (ns), (pid), (node), (rescheduled)
|
||||
|
||||
#define KFD_EVENT_FMT_UNMAP_FROM_GPU(ns, pid, addr, size, node, unmap_trigger)\
|
||||
"%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\
|
||||
(node), (unmap_trigger)
|
||||
|
||||
/**************************************************************************************************
|
||||
* CRIU IOCTLs (Checkpoint Restore In Userspace)
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user