From 9baf0920b5d8e4e4ea302cb954168e06cf838d63 Mon Sep 17 00:00:00 2001 From: Bing Niu Date: Mon, 7 Nov 2016 10:44:36 +0800 Subject: [PATCH 1/8] drm/i915/gvt: don't rely on guest PPGTT entry to free old shadow data On guest writing a PPGTT entry, if it contains value and the old entry is valid, gvt will read it and find & free the corresponding old data for it. However, with the KVM write protection provided by page_track, the guest entry will be written with new value before gvt handling. To avoid that, we should use the shadow entry instead. Signed-off-by: Bing Niu Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 51 ++++++++++++---------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 15f7d4e8e75d..7eaaf1c9ed2b 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -974,7 +974,7 @@ fail: } static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, - struct intel_gvt_gtt_entry *we, unsigned long index) + unsigned long index) { struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu_shadow_page *sp = &spt->shadow_page; @@ -983,25 +983,26 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, struct intel_gvt_gtt_entry e; int ret; - trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, - we->val64, index); - ppgtt_get_shadow_entry(spt, &e, index); + + trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, e.val64, + index); + if (!ops->test_present(&e)) return 0; if (ops->get_pfn(&e) == vgpu->gtt.scratch_pt[sp->type].page_mfn) return 0; - if (gtt_type_is_pt(get_next_pt_type(we->type))) { - struct intel_vgpu_guest_page *g = - intel_vgpu_find_guest_page(vgpu, ops->get_pfn(we)); - if (!g) { + if (gtt_type_is_pt(get_next_pt_type(e.type))) { + struct intel_vgpu_ppgtt_spt *s = + ppgtt_find_shadow_page(vgpu, ops->get_pfn(&e)); + if (!s) { gvt_err("fail to find guest page\n"); ret = -ENXIO; goto fail; } - ret = ppgtt_invalidate_shadow_page(guest_page_to_ppgtt_spt(g)); + ret = ppgtt_invalidate_shadow_page(s); if (ret) goto fail; } @@ -1010,7 +1011,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, return 0; fail: gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n", - vgpu->id, spt, we->val64, we->type); + vgpu->id, spt, e.val64, e.type); return ret; } @@ -1231,23 +1232,16 @@ static int ppgtt_handle_guest_write_page_table( struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - struct intel_gvt_gtt_entry ge; - int old_present, new_present; int ret; + int new_present; - ppgtt_get_guest_entry(spt, &ge, index); - - old_present = ops->test_present(&ge); new_present = ops->test_present(we); - ppgtt_set_guest_entry(spt, we, index); + ret = ppgtt_handle_guest_entry_removal(gpt, index); + if (ret) + goto fail; - if (old_present) { - ret = ppgtt_handle_guest_entry_removal(gpt, &ge, index); - if (ret) - goto fail; - } if (new_present) { ret = ppgtt_handle_guest_entry_add(gpt, we, index); if (ret) @@ -1293,7 +1287,7 @@ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) { struct list_head *pos, *n; struct intel_vgpu_ppgtt_spt *spt; - struct intel_gvt_gtt_entry ge, e; + struct intel_gvt_gtt_entry ge; unsigned long index; int ret; @@ -1304,9 +1298,6 @@ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) for_each_set_bit(index, spt->post_shadow_bitmap, GTT_ENTRY_NUM_IN_ONE_PAGE) { ppgtt_get_guest_entry(spt, &ge, index); - e = ge; - e.val64 = 0; - ppgtt_set_guest_entry(spt, &e, index); ret = ppgtt_handle_guest_write_page_table( &spt->guest_page, &ge, index); @@ -1334,8 +1325,6 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp, index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift; ppgtt_get_guest_entry(spt, &we, index); - memcpy((void *)&we.val64 + (pa & (info->gtt_entry_size - 1)), - p_data, bytes); ops->test_pse(&we); @@ -1344,19 +1333,13 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp, if (ret) return ret; } else { - struct intel_gvt_gtt_entry ge; - - ppgtt_get_guest_entry(spt, &ge, index); - if (!test_bit(index, spt->post_shadow_bitmap)) { - ret = ppgtt_handle_guest_entry_removal(gpt, - &ge, index); + ret = ppgtt_handle_guest_entry_removal(gpt, index); if (ret) return ret; } ppgtt_set_post_shadow(spt, index); - ppgtt_set_guest_entry(spt, &we, index); } if (!enable_out_of_sync) From c754936fe66c45d2075970dc1e6ebdfeec4df6f3 Mon Sep 17 00:00:00 2001 From: Xiaoguang Chen Date: Thu, 3 Nov 2016 18:38:30 +0800 Subject: [PATCH 2/8] drm/i915/gvt: use kmap instead of kmap_atomic around guest memory access kmap_atomic doesn't allow sleep until unmapped. However, it's necessary to allow sleep during reading/writing guest memory, so use kmap instead. Signed-off-by: Bing Niu Signed-off-by: Xiaoguang Chen Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 843a5de4300d..7d87c43661c5 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -89,15 +89,15 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) } page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + i); - dst = kmap_atomic(page); + dst = kmap(page); intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, GTT_PAGE_SIZE); - kunmap_atomic(dst); + kunmap(page); i++; } page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap_atomic(page); + shadow_ring_context = kmap(page); #define COPY_REG(name) \ intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ @@ -123,7 +123,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) sizeof(*shadow_ring_context), GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); - kunmap_atomic(shadow_ring_context); + kunmap(page); return 0; } @@ -318,10 +318,10 @@ static void update_guest_context(struct intel_vgpu_workload *workload) } page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + i); - src = kmap_atomic(page); + src = kmap(page); intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, GTT_PAGE_SIZE); - kunmap_atomic(src); + kunmap(page); i++; } @@ -329,7 +329,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4); page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap_atomic(page); + shadow_ring_context = kmap(page); #define COPY_REG(name) \ intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \ @@ -347,7 +347,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) sizeof(*shadow_ring_context), GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); - kunmap_atomic(shadow_ring_context); + kunmap(page); } static void complete_current_workload(struct intel_gvt *gvt, int ring_id) From 1f31c8294880d1ac99b1b477efd9de23b36cd5ec Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 3 Nov 2016 18:38:31 +0800 Subject: [PATCH 3/8] drm/i915/gvt: add intel vgpu types support By providing predefined vGPU types, users can choose which type a vgpu to create and use, without specifying detailed parameters. Signed-off-by: Zhenyu Wang Signed-off-by: Jike Song --- drivers/gpu/drm/i915/gvt/gvt.c | 11 ++- drivers/gpu/drm/i915/gvt/gvt.h | 28 +++++- drivers/gpu/drm/i915/gvt/vgpu.c | 148 +++++++++++++++++++++++++++++--- 3 files changed, 172 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 385969a89216..48a67d1e1893 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -193,6 +193,8 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) intel_gvt_clean_mmio_info(gvt); intel_gvt_free_firmware(gvt); + intel_gvt_clean_vgpu_types(gvt); + kfree(dev_priv->gvt); dev_priv->gvt = NULL; } @@ -270,10 +272,17 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) if (ret) goto out_clean_cmd_parser; - gvt_dbg_core("gvt device creation is done\n"); + ret = intel_gvt_init_vgpu_types(gvt); + if (ret) + goto out_clean_thread; + + + gvt_dbg_core("gvt device initialization is done\n"); dev_priv->gvt = gvt; return 0; +out_clean_thread: + clean_service_thread(gvt); out_clean_cmd_parser: intel_gvt_clean_cmd_parser(gvt); out_clean_sched_policy: diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 62fc9e3ac5c6..33e6a6447025 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -190,6 +190,16 @@ struct intel_gvt_opregion { u32 opregion_pa; }; +#define NR_MAX_INTEL_VGPU_TYPES 20 +struct intel_vgpu_type { + char name[16]; + unsigned int max_instance; + unsigned int avail_instance; + unsigned int low_gm_size; + unsigned int high_gm_size; + unsigned int fence; +}; + struct intel_gvt { struct mutex lock; struct drm_i915_private *dev_priv; @@ -205,6 +215,8 @@ struct intel_gvt { struct intel_gvt_opregion opregion; struct intel_gvt_workload_scheduler scheduler; DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS); + struct intel_vgpu_type *types; + unsigned int num_types; struct task_struct *service_thread; wait_queue_head_t service_thread_wq; @@ -230,6 +242,14 @@ static inline void intel_gvt_request_service(struct intel_gvt *gvt, void intel_gvt_free_firmware(struct intel_gvt *gvt); int intel_gvt_load_firmware(struct intel_gvt *gvt); +/* Aperture/GM space definitions for GVT device */ +#define MB_TO_BYTES(mb) ((mb) << 20ULL) +#define BYTES_TO_MB(b) ((b) >> 20ULL) + +#define HOST_LOW_GM_SIZE MB_TO_BYTES(128) +#define HOST_HIGH_GM_SIZE MB_TO_BYTES(384) +#define HOST_FENCE 4 + /* Aperture/GM space definitions for GVT device */ #define gvt_aperture_sz(gvt) (gvt->dev_priv->ggtt.mappable_end) #define gvt_aperture_pa_base(gvt) (gvt->dev_priv->ggtt.mappable_base) @@ -330,12 +350,14 @@ static inline void intel_vgpu_write_pci_bar(struct intel_vgpu *vgpu, } } -struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, - struct intel_vgpu_creation_params * - param); +int intel_gvt_init_vgpu_types(struct intel_gvt *gvt); +void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt); +struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, + struct intel_vgpu_type *type); void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu); + /* validating GM functions */ #define vgpu_gmadr_is_aperture(vgpu, gmadr) \ ((gmadr >= vgpu_aperture_gmadr_base(vgpu)) && \ diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 4f54005b976d..de3c1876aae3 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -132,6 +132,106 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); } +/** + * intel_gvt_init_vgpu_types - initialize vGPU type list + * @gvt : GVT device + * + * Initialize vGPU type list based on available resource. + * + */ +int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) +{ + unsigned int num_types; + unsigned int i, low_avail; + unsigned int min_low; + + /* vGPU type name is defined as GVTg_Vx_y which contains + * physical GPU generation type and 'y' means maximum vGPU + * instances user can create on one physical GPU for this + * type. + * + * Depend on physical SKU resource, might see vGPU types like + * GVTg_V4_8, GVTg_V4_4, GVTg_V4_2, etc. We can create + * different types of vGPU on same physical GPU depending on + * available resource. Each vGPU type will have "avail_instance" + * to indicate how many vGPU instance can be created for this + * type. + * + * Currently use static size here as we init type earlier.. + */ + low_avail = MB_TO_BYTES(256) - HOST_LOW_GM_SIZE; + num_types = 4; + + gvt->types = kzalloc(num_types * sizeof(struct intel_vgpu_type), + GFP_KERNEL); + if (!gvt->types) + return -ENOMEM; + + min_low = MB_TO_BYTES(32); + for (i = 0; i < num_types; ++i) { + if (low_avail / min_low == 0) + break; + gvt->types[i].low_gm_size = min_low; + gvt->types[i].high_gm_size = 3 * gvt->types[i].low_gm_size; + gvt->types[i].fence = 4; + gvt->types[i].max_instance = low_avail / min_low; + gvt->types[i].avail_instance = gvt->types[i].max_instance; + + if (IS_GEN8(gvt->dev_priv)) + sprintf(gvt->types[i].name, "GVTg_V4_%u", + gvt->types[i].max_instance); + else if (IS_GEN9(gvt->dev_priv)) + sprintf(gvt->types[i].name, "GVTg_V5_%u", + gvt->types[i].max_instance); + + min_low <<= 1; + gvt_dbg_core("type[%d]: %s max %u avail %u low %u high %u fence %u\n", + i, gvt->types[i].name, gvt->types[i].max_instance, + gvt->types[i].avail_instance, + gvt->types[i].low_gm_size, + gvt->types[i].high_gm_size, gvt->types[i].fence); + } + + gvt->num_types = i; + return 0; +} + +void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt) +{ + kfree(gvt->types); +} + +static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) +{ + int i; + unsigned int low_gm_avail, high_gm_avail, fence_avail; + unsigned int low_gm_min, high_gm_min, fence_min, total_min; + + /* Need to depend on maxium hw resource size but keep on + * static config for now. + */ + low_gm_avail = MB_TO_BYTES(256) - HOST_LOW_GM_SIZE - + gvt->gm.vgpu_allocated_low_gm_size; + high_gm_avail = MB_TO_BYTES(256) * 3 - HOST_HIGH_GM_SIZE - + gvt->gm.vgpu_allocated_high_gm_size; + fence_avail = gvt_fence_sz(gvt) - HOST_FENCE - + gvt->fence.vgpu_allocated_fence_num; + + for (i = 0; i < gvt->num_types; i++) { + low_gm_min = low_gm_avail / gvt->types[i].low_gm_size; + high_gm_min = high_gm_avail / gvt->types[i].high_gm_size; + fence_min = fence_avail / gvt->types[i].fence; + total_min = min(min(low_gm_min, high_gm_min), fence_min); + gvt->types[i].avail_instance = min(gvt->types[i].max_instance, + total_min); + + gvt_dbg_core("update type[%d]: %s max %u avail %u low %u high %u fence %u\n", + i, gvt->types[i].name, gvt->types[i].max_instance, + gvt->types[i].avail_instance, gvt->types[i].low_gm_size, + gvt->types[i].high_gm_size, gvt->types[i].fence); + } +} + /** * intel_gvt_destroy_vgpu - destroy a virtual GPU * @vgpu: virtual GPU @@ -166,20 +266,11 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) clean_vgpu_mmio(vgpu); vfree(vgpu); + intel_gvt_update_vgpu_types(gvt); mutex_unlock(&gvt->lock); } -/** - * intel_gvt_create_vgpu - create a virtual GPU - * @gvt: GVT device - * @param: vGPU creation parameters - * - * This function is called when user wants to create a virtual GPU. - * - * Returns: - * pointer to intel_vgpu, error pointer if failed. - */ -struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, +static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, struct intel_vgpu_creation_params *param) { struct intel_vgpu *vgpu; @@ -272,3 +363,38 @@ out_free_vgpu: mutex_unlock(&gvt->lock); return ERR_PTR(ret); } + +/** + * intel_gvt_create_vgpu - create a virtual GPU + * @gvt: GVT device + * @type: type of the vGPU to create + * + * This function is called when user wants to create a virtual GPU. + * + * Returns: + * pointer to intel_vgpu, error pointer if failed. + */ +struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, + struct intel_vgpu_type *type) +{ + struct intel_vgpu_creation_params param; + struct intel_vgpu *vgpu; + + param.handle = 0; + param.low_gm_sz = type->low_gm_size; + param.high_gm_sz = type->high_gm_size; + param.fence_sz = type->fence; + + /* XXX current param based on MB */ + param.low_gm_sz = BYTES_TO_MB(param.low_gm_sz); + param.high_gm_sz = BYTES_TO_MB(param.high_gm_sz); + + vgpu = __intel_gvt_create_vgpu(gvt, ¶m); + if (IS_ERR(vgpu)) + return vgpu; + + /* calculate left instance change for types */ + intel_gvt_update_vgpu_types(gvt); + + return vgpu; +} From 8f89743bddec87b7e0eefe9895274653ce341059 Mon Sep 17 00:00:00 2001 From: Jike Song Date: Thu, 3 Nov 2016 18:38:32 +0800 Subject: [PATCH 4/8] drm/i915/gvt: remove obsolete code for old kvmgt opregion Current GVT contains some obsolete logic originally cooked to support the old, non-vfio kvmgt, which is actually workarounds. We don't support that anymore, so it's safe to remove it and make a better framework. Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cfg_space.c | 5 ++-- drivers/gpu/drm/i915/gvt/hypercall.h | 3 +-- drivers/gpu/drm/i915/gvt/mpt.h | 10 ++------ drivers/gpu/drm/i915/gvt/opregion.c | 34 ++++------------------------ drivers/gpu/drm/i915/gvt/vgpu.c | 10 +------- 5 files changed, 11 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index 4c687740f5f1..865629a5c553 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -82,9 +82,8 @@ static int map_aperture(struct intel_vgpu *vgpu, bool map) ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, first_gfn, first_mfn, - vgpu_aperture_sz(vgpu) - >> PAGE_SHIFT, map, - GVT_MAP_APERTURE); + vgpu_aperture_sz(vgpu) >> + PAGE_SHIFT, map); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 027ef558d91c..193fd38a96af 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -60,8 +60,7 @@ struct intel_gvt_mpt { unsigned long len); unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn); int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, - unsigned long mfn, unsigned int nr, bool map, - int type); + unsigned long mfn, unsigned int nr, bool map); int (*set_trap_area)(unsigned long handle, u64 start, u64 end, bool map); }; diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 67858782d327..15e08dcb3199 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -224,11 +224,6 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn( return intel_gvt_host.mpt->gfn_to_mfn(vgpu->handle, gfn); } -enum { - GVT_MAP_APERTURE = 0, - GVT_MAP_OPREGION, -}; - /** * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN * @vgpu: a vGPU @@ -236,7 +231,6 @@ enum { * @mfn: host PFN * @nr: amount of PFNs * @map: map or unmap - * @type: map type * * Returns: * Zero on success, negative error code if failed. @@ -244,10 +238,10 @@ enum { static inline int intel_gvt_hypervisor_map_gfn_to_mfn( struct intel_vgpu *vgpu, unsigned long gfn, unsigned long mfn, unsigned int nr, - bool map, int type) + bool map) { return intel_gvt_host.mpt->map_gfn_to_mfn(vgpu->handle, gfn, mfn, nr, - map, type); + map); } /** diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 95218913b0bc..d2a0fbc896c3 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -73,7 +73,7 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map) } ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, vgpu_opregion(vgpu)->gfn[i], - mfn, 1, map, GVT_MAP_OPREGION); + mfn, 1, map); if (ret) { gvt_err("fail to map GFN to MFN, errno: %d\n", ret); return ret; @@ -89,28 +89,18 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map) */ void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu) { - int i; - gvt_dbg_core("vgpu%d: clean vgpu opregion\n", vgpu->id); if (!vgpu_opregion(vgpu)->va) return; - if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_KVM) { - vunmap(vgpu_opregion(vgpu)->va); - for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) { - if (vgpu_opregion(vgpu)->pages[i]) { - put_page(vgpu_opregion(vgpu)->pages[i]); - vgpu_opregion(vgpu)->pages[i] = NULL; - } - } - } else { + if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) { map_vgpu_opregion(vgpu, false); free_pages((unsigned long)vgpu_opregion(vgpu)->va, INTEL_GVT_OPREGION_PORDER); - } - vgpu_opregion(vgpu)->va = NULL; + vgpu_opregion(vgpu)->va = NULL; + } } /** @@ -137,22 +127,8 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa) ret = map_vgpu_opregion(vgpu, true); if (ret) return ret; - } else { - gvt_dbg_core("emulate opregion from userspace\n"); - - /* - * If opregion pages are not allocated from host kenrel, - * most of the params are meaningless - */ - ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, - 0, /* not used */ - 0, /* not used */ - 2, /* not used */ - 1, - GVT_MAP_OPREGION); - if (ret) - return ret; } + return 0; } diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index de3c1876aae3..9b09f697862c 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -315,15 +315,9 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_detach_hypervisor_vgpu; - if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_KVM) { - ret = intel_vgpu_init_opregion(vgpu, 0); - if (ret) - goto out_clean_gtt; - } - ret = intel_vgpu_init_display(vgpu); if (ret) - goto out_clean_opregion; + goto out_clean_gtt; ret = intel_vgpu_init_execlist(vgpu); if (ret) @@ -348,8 +342,6 @@ out_clean_execlist: intel_vgpu_clean_execlist(vgpu); out_clean_display: intel_vgpu_clean_display(vgpu); -out_clean_opregion: - intel_vgpu_clean_opregion(vgpu); out_clean_gtt: intel_vgpu_clean_gtt(vgpu); out_detach_hypervisor_vgpu: From 40df6ea07a15032e33b6f3d415bed0774547ec7e Mon Sep 17 00:00:00 2001 From: Jike Song Date: Thu, 3 Nov 2016 18:38:33 +0800 Subject: [PATCH 5/8] drm/i915/gvt: introduce host_init/host_exit to MPT GVT host needs init/exit hooks to do some initialization/cleanup work, e.g.: vfio mdev host device register/unregister. Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 9 +++++++++ drivers/gpu/drm/i915/gvt/hypercall.h | 2 ++ drivers/gpu/drm/i915/gvt/mpt.h | 21 +++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 48a67d1e1893..13db29d045a4 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -193,6 +193,7 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) intel_gvt_clean_mmio_info(gvt); intel_gvt_free_firmware(gvt); + intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev, gvt); intel_gvt_clean_vgpu_types(gvt); kfree(dev_priv->gvt); @@ -276,11 +277,19 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) if (ret) goto out_clean_thread; + ret = intel_gvt_hypervisor_host_init(&dev_priv->drm.pdev->dev, gvt, + &intel_gvt_io_emulation_ops); + if (ret) { + gvt_err("failed to register gvt-g host device: %d\n", ret); + goto out_clean_types; + } gvt_dbg_core("gvt device initialization is done\n"); dev_priv->gvt = gvt; return 0; +out_clean_types: + intel_gvt_clean_vgpu_types(gvt); out_clean_thread: clean_service_thread(gvt); out_clean_cmd_parser: diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 193fd38a96af..f3e926340983 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -48,6 +48,8 @@ extern struct intel_gvt_io_emulation_ops intel_gvt_io_emulation_ops; */ struct intel_gvt_mpt { int (*detect_host)(void); + int (*host_init)(struct device *dev, void *gvt, const void *ops); + void (*host_exit)(struct device *dev, void *gvt); int (*attach_vgpu)(void *vgpu, unsigned long *handle); void (*detach_vgpu)(unsigned long handle); int (*inject_msi)(unsigned long handle, u32 addr, u16 data); diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 15e08dcb3199..93649b34798b 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -55,6 +55,27 @@ static inline int intel_gvt_hypervisor_detect_host(void) return intel_gvt_host.mpt->detect_host(); } +/** + * intel_gvt_hypervisor_host_init - init GVT-g host side + * + * Returns: + * Zero on success, negative error code if failed + */ +static inline int intel_gvt_hypervisor_host_init(struct device *dev, + void *gvt, const void *ops) +{ + return intel_gvt_host.mpt->host_init(dev, gvt, ops); +} + +/** + * intel_gvt_hypervisor_host_exit - exit GVT-g host side + */ +static inline void intel_gvt_hypervisor_host_exit(struct device *dev, + void *gvt) +{ + intel_gvt_host.mpt->host_exit(dev, gvt); +} + /** * intel_gvt_hypervisor_attach_vgpu - call hypervisor to initialize vGPU * related stuffs inside hypervisor. From 7b3343b7e804bb89ad24a0a8c4e8e1010a418f14 Mon Sep 17 00:00:00 2001 From: Jike Song Date: Thu, 3 Nov 2016 18:38:34 +0800 Subject: [PATCH 6/8] drm/i915/gvt: allow several MPT methods to be NULL Hypervisors are different, the MPT ops is a only superset of all possibly supported hypervisors. There might be other way out of the MPT to achieve same target. e.g. vfio-based kvmgt won't provide map_gfn_to_mfn method to establish guest EPT mapping for aperture, since it will be done in QEMU/KVM, MMIO is also trapped elsewhere, etc. Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mpt.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 93649b34798b..1af5830c0a56 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -64,6 +64,10 @@ static inline int intel_gvt_hypervisor_detect_host(void) static inline int intel_gvt_hypervisor_host_init(struct device *dev, void *gvt, const void *ops) { + /* optional to provide */ + if (!intel_gvt_host.mpt->host_init) + return 0; + return intel_gvt_host.mpt->host_init(dev, gvt, ops); } @@ -73,6 +77,10 @@ static inline int intel_gvt_hypervisor_host_init(struct device *dev, static inline void intel_gvt_hypervisor_host_exit(struct device *dev, void *gvt) { + /* optional to provide */ + if (!intel_gvt_host.mpt->host_exit) + return; + intel_gvt_host.mpt->host_exit(dev, gvt); } @@ -85,6 +93,10 @@ static inline void intel_gvt_hypervisor_host_exit(struct device *dev, */ static inline int intel_gvt_hypervisor_attach_vgpu(struct intel_vgpu *vgpu) { + /* optional to provide */ + if (!intel_gvt_host.mpt->attach_vgpu) + return 0; + return intel_gvt_host.mpt->attach_vgpu(vgpu, &vgpu->handle); } @@ -97,6 +109,10 @@ static inline int intel_gvt_hypervisor_attach_vgpu(struct intel_vgpu *vgpu) */ static inline void intel_gvt_hypervisor_detach_vgpu(struct intel_vgpu *vgpu) { + /* optional to provide */ + if (!intel_gvt_host.mpt->detach_vgpu) + return; + intel_gvt_host.mpt->detach_vgpu(vgpu->handle); } @@ -261,6 +277,10 @@ static inline int intel_gvt_hypervisor_map_gfn_to_mfn( unsigned long mfn, unsigned int nr, bool map) { + /* a MPT implementation could have MMIO mapped elsewhere */ + if (!intel_gvt_host.mpt->map_gfn_to_mfn) + return 0; + return intel_gvt_host.mpt->map_gfn_to_mfn(vgpu->handle, gfn, mfn, nr, map); } @@ -278,6 +298,10 @@ static inline int intel_gvt_hypervisor_map_gfn_to_mfn( static inline int intel_gvt_hypervisor_set_trap_area( struct intel_vgpu *vgpu, u64 start, u64 end, bool map) { + /* a MPT implementation could have MMIO trapped elsewhere */ + if (!intel_gvt_host.mpt->set_trap_area) + return 0; + return intel_gvt_host.mpt->set_trap_area(vgpu->handle, start, end, map); } From 9ec1e66b8084f24d41046bd9711fbd7ec6e3850f Mon Sep 17 00:00:00 2001 From: Jike Song Date: Thu, 3 Nov 2016 18:38:35 +0800 Subject: [PATCH 7/8] drm/i915/gvt: refactor intel_gvt_io_emulation_ops to be intel_gvt_ops There are currently 4 methods in intel_gvt_io_emulation_ops to emulate CFG/MMIO reading/writing for intel vGPU. A possibly better scope is: add 3 more methods for vgpu create/destroy/reset respectively, and rename the ops to 'intel_gvt_ops', then pass it to the MPT module (say the future kvmgt) to use: they are all methods for external usage. Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cfg_space.c | 7 ++----- drivers/gpu/drm/i915/gvt/gvt.c | 7 +++++-- drivers/gpu/drm/i915/gvt/gvt.h | 21 +++++++++++++++++++-- drivers/gpu/drm/i915/gvt/hypercall.h | 9 --------- drivers/gpu/drm/i915/gvt/mmio.c | 6 ++---- drivers/gpu/drm/i915/gvt/mmio.h | 9 +++++---- drivers/gpu/drm/i915/gvt/vgpu.c | 11 +++++++++++ 7 files changed, 44 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index 865629a5c553..db516382a4d4 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -47,11 +47,9 @@ enum { * Returns: * Zero on success, negative error code if failed. */ -int intel_vgpu_emulate_cfg_read(void *__vgpu, unsigned int offset, +int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - struct intel_vgpu *vgpu = __vgpu; - if (WARN_ON(bytes > 4)) return -EINVAL; @@ -234,10 +232,9 @@ static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset, * Returns: * Zero on success, negative error code if failed. */ -int intel_vgpu_emulate_cfg_write(void *__vgpu, unsigned int offset, +int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - struct intel_vgpu *vgpu = __vgpu; int ret; if (WARN_ON(bytes > 4)) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 13db29d045a4..7af265ac8d74 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -44,11 +44,14 @@ static const char * const supported_hypervisors[] = { [INTEL_GVT_HYPERVISOR_KVM] = "KVM", }; -struct intel_gvt_io_emulation_ops intel_gvt_io_emulation_ops = { +static const struct intel_gvt_ops intel_gvt_ops = { .emulate_cfg_read = intel_vgpu_emulate_cfg_read, .emulate_cfg_write = intel_vgpu_emulate_cfg_write, .emulate_mmio_read = intel_vgpu_emulate_mmio_read, .emulate_mmio_write = intel_vgpu_emulate_mmio_write, + .vgpu_create = intel_gvt_create_vgpu, + .vgpu_destroy = intel_gvt_destroy_vgpu, + .vgpu_reset = intel_gvt_reset_vgpu, }; /** @@ -278,7 +281,7 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) goto out_clean_thread; ret = intel_gvt_hypervisor_host_init(&dev_priv->drm.pdev->dev, gvt, - &intel_gvt_io_emulation_ops); + &intel_gvt_ops); if (ret) { gvt_err("failed to register gvt-g host device: %d\n", ret); goto out_clean_types; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 33e6a6447025..5e7b0bd01695 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -356,6 +356,7 @@ void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt); struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, struct intel_vgpu_type *type); void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu); +void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu); /* validating GM functions */ @@ -391,10 +392,10 @@ int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index, int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, unsigned long *g_index); -int intel_vgpu_emulate_cfg_read(void *__vgpu, unsigned int offset, +int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); -int intel_vgpu_emulate_cfg_write(void *__vgpu, unsigned int offset, +int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); void intel_gvt_clean_opregion(struct intel_gvt *gvt); @@ -407,6 +408,22 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci); int setup_vgpu_mmio(struct intel_vgpu *vgpu); void populate_pvinfo_page(struct intel_vgpu *vgpu); +struct intel_gvt_ops { + int (*emulate_cfg_read)(struct intel_vgpu *, unsigned int, void *, + unsigned int); + int (*emulate_cfg_write)(struct intel_vgpu *, unsigned int, void *, + unsigned int); + int (*emulate_mmio_read)(struct intel_vgpu *, u64, void *, + unsigned int); + int (*emulate_mmio_write)(struct intel_vgpu *, u64, void *, + unsigned int); + struct intel_vgpu *(*vgpu_create)(struct intel_gvt *, + struct intel_vgpu_type *); + void (*vgpu_destroy)(struct intel_vgpu *); + void (*vgpu_reset)(struct intel_vgpu *); +}; + + #include "mpt.h" #endif diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index f3e926340983..30e543f5a703 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -33,15 +33,6 @@ #ifndef _GVT_HYPERCALL_H_ #define _GVT_HYPERCALL_H_ -struct intel_gvt_io_emulation_ops { - int (*emulate_cfg_read)(void *, unsigned int, void *, unsigned int); - int (*emulate_cfg_write)(void *, unsigned int, void *, unsigned int); - int (*emulate_mmio_read)(void *, u64, void *, unsigned int); - int (*emulate_mmio_write)(void *, u64, void *, unsigned int); -}; - -extern struct intel_gvt_io_emulation_ops intel_gvt_io_emulation_ops; - /* * Specific GVT-g MPT modules function collections. Currently GVT-g supports * both Xen and KVM by providing dedicated hypervisor-related MPT modules. diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 585b01f63254..09c9450a1946 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -67,10 +67,9 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) * Returns: * Zero on success, negative error code if failed */ -int intel_vgpu_emulate_mmio_read(void *__vgpu, uint64_t pa, +int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, void *p_data, unsigned int bytes) { - struct intel_vgpu *vgpu = __vgpu; struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_mmio_info *mmio; unsigned int offset = 0; @@ -179,10 +178,9 @@ err: * Returns: * Zero on success, negative error code if failed */ -int intel_vgpu_emulate_mmio_write(void *__vgpu, uint64_t pa, +int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, void *p_data, unsigned int bytes) { - struct intel_vgpu *vgpu = __vgpu; struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_mmio_info *mmio; unsigned int offset = 0; diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 9dc739a01892..87d5b5e366a3 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -87,10 +87,11 @@ struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt, }) int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa); -int intel_vgpu_emulate_mmio_read(void *__vgpu, u64 pa, void *p_data, - unsigned int bytes); -int intel_vgpu_emulate_mmio_write(void *__vgpu, u64 pa, void *p_data, - unsigned int bytes); + +int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, u64 pa, + void *p_data, unsigned int bytes); +int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, u64 pa, + void *p_data, unsigned int bytes); bool intel_gvt_mmio_is_cmd_access(struct intel_gvt *gvt, unsigned int offset); bool intel_gvt_mmio_is_unalign(struct intel_gvt *gvt, unsigned int offset); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 9b09f697862c..3a15feadc1df 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -390,3 +390,14 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, return vgpu; } + +/** + * intel_gvt_reset_vgpu - reset a virtual GPU + * @vgpu: virtual GPU + * + * This function is called when user wants to reset a virtual GPU. + * + */ +void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu) +{ +} From f30437c5e7bfa9d8acc18058040efb4f474907c3 Mon Sep 17 00:00:00 2001 From: Jike Song Date: Wed, 9 Nov 2016 20:30:59 +0800 Subject: [PATCH 8/8] drm/i915/gvt: add KVMGT support KVMGT is the MPT implementation based on VFIO/KVM. It provides a kvmgt_mpt ops to gvt for vGPU access mediation, e.g. to mediate and emulate the MMIO accesses, to inject interrupts to vGPU user, to intercept the GTT writing and replace it with DMA-able address, to write-protect guest PPGTT table for shadowing synchronization, etc. This patch provides the MPT implementation for GVT, not yet functional due to theabsence of mdev. It's built as kvmgt.ko, depends on vfio.ko, kvm.ko and mdev.ko, and being required by i915.ko. To not introduce hard dependency in i915.ko, we used indirect symbol reference. But that means users have to include kvmgt.ko into init ramdisk if their i915.ko is included. Signed-off-by: Kevin Tian Signed-off-by: Xiaoguang Chen Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/Kconfig | 9 + drivers/gpu/drm/i915/gvt/Makefile | 7 +- drivers/gpu/drm/i915/gvt/gvt.c | 4 +- drivers/gpu/drm/i915/gvt/gvt.h | 14 + drivers/gpu/drm/i915/gvt/kvmgt.c | 601 ++++++++++++++++++++++++++++++ 5 files changed, 632 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/gvt/kvmgt.c diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index df96aed6975a..beed5c1d2cd7 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -107,6 +107,15 @@ config DRM_I915_GVT If in doubt, say "N". +config DRM_I915_GVT_KVMGT + tristate "Enable KVM/VFIO support for Intel GVT-g" + depends on DRM_I915_GVT + depends on KVM + default n + help + Choose this option if you want to enable KVMGT support for + Intel GVT-g. + menu "drm/i915 Debugging" depends on DRM_I915 depends on EXPERT diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index 34ea4776af70..8a46a7f31d53 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -3,5 +3,8 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ execlist.o scheduler.o sched_policy.o render.o cmd_parser.o -ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) -Wall -i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) +ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) -Wall +i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) + +CFLAGS_kvmgt.o := -Wno-unused-function +obj-$(CONFIG_DRM_I915_GVT_KVMGT) += $(GVT_DIR)/kvmgt.o diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 7af265ac8d74..398877c3d2fd 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -84,10 +84,12 @@ int intel_gvt_init_host(void) symbol_get(xengt_mpt), "xengt"); intel_gvt_host.hypervisor_type = INTEL_GVT_HYPERVISOR_XEN; } else { +#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) /* not in Xen. Try KVMGT */ intel_gvt_host.mpt = try_then_request_module( - symbol_get(kvmgt_mpt), "kvm"); + symbol_get(kvmgt_mpt), "kvmgt"); intel_gvt_host.hypervisor_type = INTEL_GVT_HYPERVISOR_KVM; +#endif } /* Fail to load MPT modules - bail out */ diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 5e7b0bd01695..3d4223e8ebe3 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -161,6 +161,20 @@ struct intel_vgpu { DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); struct i915_gem_context *shadow_ctx; struct notifier_block shadow_ctx_notifier_block; + +#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) + struct { + struct device *mdev; + struct vfio_region *region; + int num_regions; + struct eventfd_ctx *intx_trigger; + struct eventfd_ctx *msi_trigger; + struct rb_root cache; + struct mutex cache_lock; + void *vfio_group; + struct notifier_block iommu_notifier; + } vdev; +#endif }; struct intel_gvt_gm { diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c new file mode 100644 index 000000000000..5bf4d73d57d9 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -0,0 +1,601 @@ +/* + * KVMGT - the implementation of Intel mediated pass-through framework for KVM + * + * Copyright(c) 2014-2016 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Kevin Tian + * Jike Song + * Xiaoguang Chen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i915_drv.h" +#include "gvt.h" + +#if IS_ENABLED(CONFIG_VFIO_MDEV) +#include +#else +static inline long vfio_pin_pages(struct device *dev, unsigned long *user_pfn, + long npage, int prot, unsigned long *phys_pfn) +{ + return 0; +} +static inline long vfio_unpin_pages(struct device *dev, unsigned long *pfn, + long npage) +{ + return 0; +} +#endif + +static const struct intel_gvt_ops *intel_gvt_ops; + + +/* helper macros copied from vfio-pci */ +#define VFIO_PCI_OFFSET_SHIFT 40 +#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) +#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) +#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) + +struct vfio_region { + u32 type; + u32 subtype; + size_t size; + u32 flags; +}; + +struct kvmgt_pgfn { + gfn_t gfn; + struct hlist_node hnode; +}; + +struct kvmgt_guest_info { + struct kvm *kvm; + struct intel_vgpu *vgpu; + struct kvm_page_track_notifier_node track_node; +#define NR_BKT (1 << 18) + struct hlist_head ptable[NR_BKT]; +#undef NR_BKT +}; + +struct gvt_dma { + struct rb_node node; + gfn_t gfn; + kvm_pfn_t pfn; +}; + +static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) +{ + struct rb_node *node = vgpu->vdev.cache.rb_node; + struct gvt_dma *ret = NULL; + + while (node) { + struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node); + + if (gfn < itr->gfn) + node = node->rb_left; + else if (gfn > itr->gfn) + node = node->rb_right; + else { + ret = itr; + goto out; + } + } + +out: + return ret; +} + +static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) +{ + struct gvt_dma *entry; + + mutex_lock(&vgpu->vdev.cache_lock); + entry = __gvt_cache_find(vgpu, gfn); + mutex_unlock(&vgpu->vdev.cache_lock); + + return entry == NULL ? 0 : entry->pfn; +} + +static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn) +{ + struct gvt_dma *new, *itr; + struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL; + + new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); + if (!new) + return; + + new->gfn = gfn; + new->pfn = pfn; + + mutex_lock(&vgpu->vdev.cache_lock); + while (*link) { + parent = *link; + itr = rb_entry(parent, struct gvt_dma, node); + + if (gfn == itr->gfn) + goto out; + else if (gfn < itr->gfn) + link = &parent->rb_left; + else + link = &parent->rb_right; + } + + rb_link_node(&new->node, parent, link); + rb_insert_color(&new->node, &vgpu->vdev.cache); + mutex_unlock(&vgpu->vdev.cache_lock); + return; + +out: + mutex_unlock(&vgpu->vdev.cache_lock); + kfree(new); +} + +static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, + struct gvt_dma *entry) +{ + rb_erase(&entry->node, &vgpu->vdev.cache); + kfree(entry); +} + +static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn) +{ + struct device *dev = vgpu->vdev.mdev; + struct gvt_dma *this; + unsigned long pfn; + + mutex_lock(&vgpu->vdev.cache_lock); + this = __gvt_cache_find(vgpu, gfn); + if (!this) { + mutex_unlock(&vgpu->vdev.cache_lock); + return; + } + + pfn = this->pfn; + WARN_ON((vfio_unpin_pages(dev, &pfn, 1) != 1)); + __gvt_cache_remove_entry(vgpu, this); + mutex_unlock(&vgpu->vdev.cache_lock); +} + +static void gvt_cache_init(struct intel_vgpu *vgpu) +{ + vgpu->vdev.cache = RB_ROOT; + mutex_init(&vgpu->vdev.cache_lock); +} + +static void gvt_cache_destroy(struct intel_vgpu *vgpu) +{ + struct gvt_dma *dma; + struct rb_node *node = NULL; + struct device *dev = vgpu->vdev.mdev; + unsigned long pfn; + + mutex_lock(&vgpu->vdev.cache_lock); + while ((node = rb_first(&vgpu->vdev.cache))) { + dma = rb_entry(node, struct gvt_dma, node); + pfn = dma->pfn; + + vfio_unpin_pages(dev, &pfn, 1); + __gvt_cache_remove_entry(vgpu, dma); + } + mutex_unlock(&vgpu->vdev.cache_lock); +} + +static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt, + const char *name) +{ + int i; + struct intel_vgpu_type *t; + const char *driver_name = dev_driver_string( + &gvt->dev_priv->drm.pdev->dev); + + for (i = 0; i < gvt->num_types; i++) { + t = &gvt->types[i]; + if (!strncmp(t->name, name + strlen(driver_name) + 1, + sizeof(t->name))) + return t; + } + + return NULL; +} + +static struct attribute *type_attrs[] = { + NULL, +}; + +static struct attribute_group *intel_vgpu_type_groups[] = { + [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, +}; + +static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) +{ + int i, j; + struct intel_vgpu_type *type; + struct attribute_group *group; + + for (i = 0; i < gvt->num_types; i++) { + type = &gvt->types[i]; + + group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); + if (WARN_ON(!group)) + goto unwind; + + group->name = type->name; + group->attrs = type_attrs; + intel_vgpu_type_groups[i] = group; + } + + return true; + +unwind: + for (j = 0; j < i; j++) { + group = intel_vgpu_type_groups[j]; + kfree(group); + } + + return false; +} + +static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) +{ + int i; + struct attribute_group *group; + + for (i = 0; i < gvt->num_types; i++) { + group = intel_vgpu_type_groups[i]; + kfree(group); + } +} + +static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) +{ + hash_init(info->ptable); +} + +static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info) +{ + struct kvmgt_pgfn *p; + struct hlist_node *tmp; + int i; + + hash_for_each_safe(info->ptable, i, tmp, p, hnode) { + hash_del(&p->hnode); + kfree(p); + } +} + +static struct kvmgt_pgfn * +__kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn) +{ + struct kvmgt_pgfn *p, *res = NULL; + + hash_for_each_possible(info->ptable, p, hnode, gfn) { + if (gfn == p->gfn) { + res = p; + break; + } + } + + return res; +} + +static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info, + gfn_t gfn) +{ + struct kvmgt_pgfn *p; + + p = __kvmgt_protect_table_find(info, gfn); + return !!p; +} + +static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn) +{ + struct kvmgt_pgfn *p; + + if (kvmgt_gfn_is_write_protected(info, gfn)) + return; + + p = kmalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC); + if (WARN(!p, "gfn: 0x%llx\n", gfn)) + return; + + p->gfn = gfn; + hash_add(info->ptable, &p->hnode, gfn); +} + +static void kvmgt_protect_table_del(struct kvmgt_guest_info *info, + gfn_t gfn) +{ + struct kvmgt_pgfn *p; + + p = __kvmgt_protect_table_find(info, gfn); + if (p) { + hash_del(&p->hnode); + kfree(p); + } +} + +static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) +{ + if (!intel_gvt_init_vgpu_type_groups(gvt)) + return -EFAULT; + + intel_gvt_ops = ops; + + /* MDEV is not yet available */ + return -ENODEV; +} + +static void kvmgt_host_exit(struct device *dev, void *gvt) +{ + intel_gvt_cleanup_vgpu_type_groups(gvt); +} + +static int kvmgt_write_protect_add(unsigned long handle, u64 gfn) +{ + struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; + struct kvm *kvm = info->kvm; + struct kvm_memory_slot *slot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + slot = gfn_to_memslot(kvm, gfn); + + spin_lock(&kvm->mmu_lock); + + if (kvmgt_gfn_is_write_protected(info, gfn)) + goto out; + + kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); + kvmgt_protect_table_add(info, gfn); + +out: + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); + return 0; +} + +static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn) +{ + struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; + struct kvm *kvm = info->kvm; + struct kvm_memory_slot *slot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + slot = gfn_to_memslot(kvm, gfn); + + spin_lock(&kvm->mmu_lock); + + if (!kvmgt_gfn_is_write_protected(info, gfn)) + goto out; + + kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); + kvmgt_protect_table_del(info, gfn); + +out: + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); + return 0; +} + +static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *val, int len, + struct kvm_page_track_notifier_node *node) +{ + struct kvmgt_guest_info *info = container_of(node, + struct kvmgt_guest_info, track_node); + + if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) + intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa, + (void *)val, len); +} + +static void kvmgt_page_track_flush_slot(struct kvm *kvm, + struct kvm_memory_slot *slot, + struct kvm_page_track_notifier_node *node) +{ + int i; + gfn_t gfn; + struct kvmgt_guest_info *info = container_of(node, + struct kvmgt_guest_info, track_node); + + spin_lock(&kvm->mmu_lock); + for (i = 0; i < slot->npages; i++) { + gfn = slot->base_gfn + i; + if (kvmgt_gfn_is_write_protected(info, gfn)) { + kvm_slot_page_track_remove_page(kvm, slot, gfn, + KVM_PAGE_TRACK_WRITE); + kvmgt_protect_table_del(info, gfn); + } + } + spin_unlock(&kvm->mmu_lock); +} + +static bool kvmgt_check_guest(void) +{ + unsigned int eax, ebx, ecx, edx; + char s[12]; + unsigned int *i; + + eax = KVM_CPUID_SIGNATURE; + ebx = ecx = edx = 0; + + asm volatile ("cpuid" + : "+a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : + : "cc", "memory"); + i = (unsigned int *)s; + i[0] = ebx; + i[1] = ecx; + i[2] = edx; + + return !strncmp(s, "KVMKVMKVM", strlen("KVMKVMKVM")); +} + +/** + * NOTE: + * It's actually impossible to check if we are running in KVM host, + * since the "KVM host" is simply native. So we only dectect guest here. + */ +static int kvmgt_detect_host(void) +{ +#ifdef CONFIG_INTEL_IOMMU + if (intel_iommu_gfx_mapped) { + gvt_err("Hardware IOMMU compatibility not yet supported, try to boot with intel_iommu=igfx_off\n"); + return -ENODEV; + } +#endif + return kvmgt_check_guest() ? -ENODEV : 0; +} + +static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle) +{ + /* nothing to do here */ + return 0; +} + +static void kvmgt_detach_vgpu(unsigned long handle) +{ + /* nothing to do here */ +} + +static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data) +{ + struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; + struct intel_vgpu *vgpu = info->vgpu; + + if (vgpu->vdev.msi_trigger) + return eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1; + + return false; +} + +static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) +{ + unsigned long pfn; + struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; + int rc; + + pfn = gvt_cache_find(info->vgpu, gfn); + if (pfn != 0) + return pfn; + + rc = vfio_pin_pages(info->vgpu->vdev.mdev, &gfn, 1, + IOMMU_READ | IOMMU_WRITE, &pfn); + if (rc != 1) { + gvt_err("vfio_pin_pages failed for gfn: 0x%lx\n", gfn); + return 0; + } + + gvt_cache_add(info->vgpu, gfn, pfn); + return pfn; +} + +static void *kvmgt_gpa_to_hva(unsigned long handle, unsigned long gpa) +{ + unsigned long pfn; + gfn_t gfn = gpa_to_gfn(gpa); + + pfn = kvmgt_gfn_to_pfn(handle, gfn); + if (!pfn) + return NULL; + + return (char *)pfn_to_kaddr(pfn) + offset_in_page(gpa); +} + +static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, + void *buf, unsigned long len, bool write) +{ + void *hva = NULL; + + hva = kvmgt_gpa_to_hva(handle, gpa); + if (!hva) + return -EFAULT; + + if (write) + memcpy(hva, buf, len); + else + memcpy(buf, hva, len); + + return 0; +} + +static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa, + void *buf, unsigned long len) +{ + return kvmgt_rw_gpa(handle, gpa, buf, len, false); +} + +static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa, + void *buf, unsigned long len) +{ + return kvmgt_rw_gpa(handle, gpa, buf, len, true); +} + +static unsigned long kvmgt_virt_to_pfn(void *addr) +{ + return PFN_DOWN(__pa(addr)); +} + +struct intel_gvt_mpt kvmgt_mpt = { + .detect_host = kvmgt_detect_host, + .host_init = kvmgt_host_init, + .host_exit = kvmgt_host_exit, + .attach_vgpu = kvmgt_attach_vgpu, + .detach_vgpu = kvmgt_detach_vgpu, + .inject_msi = kvmgt_inject_msi, + .from_virt_to_mfn = kvmgt_virt_to_pfn, + .set_wp_page = kvmgt_write_protect_add, + .unset_wp_page = kvmgt_write_protect_remove, + .read_gpa = kvmgt_read_gpa, + .write_gpa = kvmgt_write_gpa, + .gfn_to_mfn = kvmgt_gfn_to_pfn, +}; +EXPORT_SYMBOL_GPL(kvmgt_mpt); + +static int __init kvmgt_init(void) +{ + return 0; +} + +static void __exit kvmgt_exit(void) +{ +} + +module_init(kvmgt_init); +module_exit(kvmgt_exit); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Intel Corporation");