diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 319c7054905d..7815e4eb9939 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -267,6 +267,19 @@ struct hns_roce_mr { int type; /* MR's register type */ u64 *pbl_buf;/* MR's PBL space */ dma_addr_t pbl_dma_addr; /* MR's PBL space PA */ + u32 pbl_size;/* PA number in the PBL */ + u64 pbl_ba;/* page table address */ + u32 l0_chunk_last_num;/* L0 last number */ + u32 l1_chunk_last_num;/* L1 last number */ + u64 **pbl_bt_l2;/* PBL BT L2 */ + u64 **pbl_bt_l1;/* PBL BT L1 */ + u64 *pbl_bt_l0;/* PBL BT L0 */ + dma_addr_t *pbl_l2_dma_addr;/* PBL BT L2 dma addr */ + dma_addr_t *pbl_l1_dma_addr;/* PBL BT L1 dma addr */ + dma_addr_t pbl_l0_dma_addr;/* PBL BT L0 dma addr */ + u32 pbl_ba_pg_sz;/* BT chunk page size */ + u32 pbl_buf_pg_sz;/* buf chunk page size */ + u32 pbl_hop_num;/* multi-hop number */ }; struct hns_roce_mr_table { @@ -514,6 +527,9 @@ struct hns_roce_caps { int qpc_entry_sz; int irrl_entry_sz; int cqc_entry_sz; + u32 pbl_ba_pg_sz; + u32 pbl_buf_pg_sz; + u32 pbl_hop_num; int aeqe_depth; int ceqe_depth[HNS_ROCE_COMP_VEC_NUM]; enum ib_mtu max_mtu; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c59d15ec6d5e..542540db45be 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -613,6 +613,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->mpt_ba_pg_sz = 0; caps->mpt_buf_pg_sz = 0; caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM; + caps->pbl_ba_pg_sz = 0; + caps->pbl_buf_pg_sz = 0; + caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM; caps->mtt_ba_pg_sz = 0; caps->mtt_buf_pg_sz = 0; caps->mtt_hop_num = HNS_ROCE_MTT_HOP_NUM; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 212d51135f9d..593d886943f1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -75,6 +75,7 @@ #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1 #define HNS_ROCE_CQE_HOP_NUM 1 +#define HNS_ROCE_PBL_HOP_NUM 2 #define HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT 0 #define HNS_ROCE_CMD_FLAG_OUT_VALID_SHIFT 1 diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 88b436fb92ee..7e6ce76b32df 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -258,6 +258,239 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) } EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup); +static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr, int err_loop_index, + int loop_i, int loop_j) +{ + struct device *dev = hr_dev->dev; + u32 mhop_num; + u32 pbl_bt_sz; + u64 bt_idx; + int i, j; + + pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); + mhop_num = hr_dev->caps.pbl_hop_num; + + i = loop_i; + j = loop_j; + if (mhop_num == 3 && err_loop_index == 2) { + for (; i >= 0; i--) { + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + + for (j = 0; j < pbl_bt_sz / 8; j++) { + if (i == loop_i && j >= loop_j) + break; + + bt_idx = i * pbl_bt_sz / 8 + j; + dma_free_coherent(dev, pbl_bt_sz, + mr->pbl_bt_l2[bt_idx], + mr->pbl_l2_dma_addr[bt_idx]); + } + } + } else if (mhop_num == 3 && err_loop_index == 1) { + for (i -= 1; i >= 0; i--) { + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + + for (j = 0; j < pbl_bt_sz / 8; j++) { + bt_idx = i * pbl_bt_sz / 8 + j; + dma_free_coherent(dev, pbl_bt_sz, + mr->pbl_bt_l2[bt_idx], + mr->pbl_l2_dma_addr[bt_idx]); + } + } + } else if (mhop_num == 2 && err_loop_index == 1) { + for (i -= 1; i >= 0; i--) + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + } else { + dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.", + mhop_num, err_loop_index); + return; + } + + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr); + mr->pbl_bt_l0 = NULL; + mr->pbl_l0_dma_addr = 0; +} + +/* PBL multi hop addressing */ +static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages, + struct hns_roce_mr *mr) +{ + struct device *dev = hr_dev->dev; + int mr_alloc_done = 0; + int npages_allocated; + int i = 0, j = 0; + u32 pbl_bt_sz; + u32 mhop_num; + u64 pbl_last_bt_num; + u64 pbl_bt_cnt = 0; + u64 bt_idx; + u64 size; + + mhop_num = hr_dev->caps.pbl_hop_num; + pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); + pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); + + if (mhop_num == HNS_ROCE_HOP_NUM_0) + return 0; + + /* hop_num = 1 */ + if (mhop_num == 1) { + if (npages > pbl_bt_sz / 8) { + dev_err(dev, "npages %d is larger than buf_pg_sz!", + npages); + return -EINVAL; + } + mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + &(mr->pbl_dma_addr), + GFP_KERNEL); + if (!mr->pbl_buf) + return -ENOMEM; + + mr->pbl_size = npages; + mr->pbl_ba = mr->pbl_dma_addr; + mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; + mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; + mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; + return 0; + } + + mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8, + sizeof(*mr->pbl_l1_dma_addr), + GFP_KERNEL); + if (!mr->pbl_l1_dma_addr) + return -ENOMEM; + + mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1), + GFP_KERNEL); + if (!mr->pbl_bt_l1) + goto err_kcalloc_bt_l1; + + if (mhop_num == 3) { + mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num, + sizeof(*mr->pbl_l2_dma_addr), + GFP_KERNEL); + if (!mr->pbl_l2_dma_addr) + goto err_kcalloc_l2_dma; + + mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num, + sizeof(*mr->pbl_bt_l2), + GFP_KERNEL); + if (!mr->pbl_bt_l2) + goto err_kcalloc_bt_l2; + } + + /* alloc L0 BT */ + mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz, + &(mr->pbl_l0_dma_addr), + GFP_KERNEL); + if (!mr->pbl_bt_l0) + goto err_dma_alloc_l0; + + if (mhop_num == 2) { + /* alloc L1 BT */ + for (i = 0; i < pbl_bt_sz / 8; i++) { + if (pbl_bt_cnt + 1 < pbl_last_bt_num) { + size = pbl_bt_sz; + } else { + npages_allocated = i * (pbl_bt_sz / 8); + size = (npages - npages_allocated) * 8; + } + mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size, + &(mr->pbl_l1_dma_addr[i]), + GFP_KERNEL); + if (!mr->pbl_bt_l1[i]) { + hns_roce_loop_free(hr_dev, mr, 1, i, 0); + goto err_dma_alloc_l0; + } + + *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; + + pbl_bt_cnt++; + if (pbl_bt_cnt >= pbl_last_bt_num) + break; + } + } else if (mhop_num == 3) { + /* alloc L1, L2 BT */ + for (i = 0; i < pbl_bt_sz / 8; i++) { + mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz, + &(mr->pbl_l1_dma_addr[i]), + GFP_KERNEL); + if (!mr->pbl_bt_l1[i]) { + hns_roce_loop_free(hr_dev, mr, 1, i, 0); + goto err_dma_alloc_l0; + } + + *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; + + for (j = 0; j < pbl_bt_sz / 8; j++) { + bt_idx = i * pbl_bt_sz / 8 + j; + + if (pbl_bt_cnt + 1 < pbl_last_bt_num) { + size = pbl_bt_sz; + } else { + npages_allocated = bt_idx * + (pbl_bt_sz / 8); + size = (npages - npages_allocated) * 8; + } + mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent( + dev, size, + &(mr->pbl_l2_dma_addr[bt_idx]), + GFP_KERNEL); + if (!mr->pbl_bt_l2[bt_idx]) { + hns_roce_loop_free(hr_dev, mr, 2, i, j); + goto err_dma_alloc_l0; + } + + *(mr->pbl_bt_l1[i] + j) = + mr->pbl_l2_dma_addr[bt_idx]; + + pbl_bt_cnt++; + if (pbl_bt_cnt >= pbl_last_bt_num) { + mr_alloc_done = 1; + break; + } + } + + if (mr_alloc_done) + break; + } + } + + mr->l0_chunk_last_num = i + 1; + if (mhop_num == 3) + mr->l1_chunk_last_num = j + 1; + + mr->pbl_size = npages; + mr->pbl_ba = mr->pbl_l0_dma_addr; + mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; + mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; + mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; + + return 0; + +err_dma_alloc_l0: + kfree(mr->pbl_bt_l2); + mr->pbl_bt_l2 = NULL; + +err_kcalloc_bt_l2: + kfree(mr->pbl_l2_dma_addr); + mr->pbl_l2_dma_addr = NULL; + +err_kcalloc_l2_dma: + kfree(mr->pbl_bt_l1); + mr->pbl_bt_l1 = NULL; + +err_kcalloc_bt_l1: + kfree(mr->pbl_l1_dma_addr); + mr->pbl_l1_dma_addr = NULL; + + return -ENOMEM; +} + static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, u64 size, u32 access, int npages, struct hns_roce_mr *mr) @@ -282,16 +515,111 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, mr->type = MR_TYPE_DMA; mr->pbl_buf = NULL; mr->pbl_dma_addr = 0; + /* PBL multi-hop addressing parameters */ + mr->pbl_bt_l2 = NULL; + mr->pbl_bt_l1 = NULL; + mr->pbl_bt_l0 = NULL; + mr->pbl_l2_dma_addr = NULL; + mr->pbl_l1_dma_addr = NULL; + mr->pbl_l0_dma_addr = 0; } else { mr->type = MR_TYPE_MR; - mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) - return -ENOMEM; + if (!hr_dev->caps.pbl_hop_num) { + mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + &(mr->pbl_dma_addr), + GFP_KERNEL); + if (!mr->pbl_buf) + return -ENOMEM; + } else { + ret = hns_roce_mhop_alloc(hr_dev, npages, mr); + } } - return 0; + return ret; +} + +static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr) +{ + struct device *dev = hr_dev->dev; + int npages_allocated; + int npages; + int i, j; + u32 pbl_bt_sz; + u32 mhop_num; + u64 bt_idx; + + npages = ib_umem_page_count(mr->umem); + pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); + mhop_num = hr_dev->caps.pbl_hop_num; + + if (mhop_num == HNS_ROCE_HOP_NUM_0) + return; + + /* hop_num = 1 */ + if (mhop_num == 1) { + dma_free_coherent(dev, (unsigned int)(npages * 8), + mr->pbl_buf, mr->pbl_dma_addr); + return; + } + + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, + mr->pbl_l0_dma_addr); + + if (mhop_num == 2) { + for (i = 0; i < mr->l0_chunk_last_num; i++) { + if (i == mr->l0_chunk_last_num - 1) { + npages_allocated = i * (pbl_bt_sz / 8); + + dma_free_coherent(dev, + (npages - npages_allocated) * 8, + mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + + break; + } + + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + } + } else if (mhop_num == 3) { + for (i = 0; i < mr->l0_chunk_last_num; i++) { + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + + for (j = 0; j < pbl_bt_sz / 8; j++) { + bt_idx = i * (pbl_bt_sz / 8) + j; + + if ((i == mr->l0_chunk_last_num - 1) + && j == mr->l1_chunk_last_num - 1) { + npages_allocated = bt_idx * + (pbl_bt_sz / 8); + + dma_free_coherent(dev, + (npages - npages_allocated) * 8, + mr->pbl_bt_l2[bt_idx], + mr->pbl_l2_dma_addr[bt_idx]); + + break; + } + + dma_free_coherent(dev, pbl_bt_sz, + mr->pbl_bt_l2[bt_idx], + mr->pbl_l2_dma_addr[bt_idx]); + } + } + } + + kfree(mr->pbl_bt_l1); + kfree(mr->pbl_l1_dma_addr); + mr->pbl_bt_l1 = NULL; + mr->pbl_l1_dma_addr = NULL; + if (mhop_num == 3) { + kfree(mr->pbl_bt_l2); + kfree(mr->pbl_l2_dma_addr); + mr->pbl_bt_l2 = NULL; + mr->pbl_l2_dma_addr = NULL; + } } static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, @@ -310,10 +638,18 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, if (mr->size != ~0ULL) { npages = ib_umem_page_count(mr->umem); - dma_free_coherent(dev, (unsigned int)(npages * 8), mr->pbl_buf, - mr->pbl_dma_addr); + + if (!hr_dev->caps.pbl_hop_num) + dma_free_coherent(dev, (unsigned int)(npages * 8), + mr->pbl_buf, mr->pbl_dma_addr); + else + hns_roce_mhop_free(hr_dev, mr); } + if (mr->enabled) + hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, + key_to_hw_index(mr->key)); + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, key_to_hw_index(mr->key), BITMAP_NO_RR); } @@ -501,8 +837,8 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) { - int ret = 0; - struct hns_roce_mr *mr = NULL; + struct hns_roce_mr *mr; + int ret; mr = kmalloc(sizeof(*mr), GFP_KERNEL); if (mr == NULL) @@ -571,16 +907,36 @@ out: return ret; } -static int hns_roce_ib_umem_write_mr(struct hns_roce_mr *mr, +static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr, struct ib_umem *umem) { - int i = 0; - int entry; struct scatterlist *sg; + int i = 0, j = 0; + int entry; + + if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0) + return 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12; - i++; + if (!hr_dev->caps.pbl_hop_num) { + mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12; + i++; + } else if (hr_dev->caps.pbl_hop_num == 1) { + mr->pbl_buf[i] = sg_dma_address(sg); + i++; + } else { + if (hr_dev->caps.pbl_hop_num == 2) + mr->pbl_bt_l1[i][j] = sg_dma_address(sg); + else if (hr_dev->caps.pbl_hop_num == 3) + mr->pbl_bt_l2[i][j] = sg_dma_address(sg); + + j++; + if (j >= (PAGE_SIZE / 8)) { + i++; + j = 0; + } + } } /* Memory barrier */ @@ -595,9 +951,11 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; - struct hns_roce_mr *mr = NULL; - int ret = 0; - int n = 0; + struct hns_roce_mr *mr; + int bt_size; + int ret; + int n; + int i; mr = kmalloc(sizeof(*mr), GFP_KERNEL); if (!mr) @@ -618,11 +976,27 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_umem; } - if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { - dev_err(dev, " MR len %lld err. MR is limited to 4G at most!\n", - length); - ret = -EINVAL; - goto err_umem; + if (!hr_dev->caps.pbl_hop_num) { + if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { + dev_err(dev, + " MR len %lld err. MR is limited to 4G at most!\n", + length); + ret = -EINVAL; + goto err_umem; + } + } else { + int pbl_size = 1; + + bt_size = (1 << PAGE_SHIFT) / 8; + for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) + pbl_size *= bt_size; + if (n > pbl_size) { + dev_err(dev, + " MR len %lld err. MR page num is limited to %d!\n", + length, pbl_size); + ret = -EINVAL; + goto err_umem; + } } ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length, @@ -630,7 +1004,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (ret) goto err_umem; - ret = hns_roce_ib_umem_write_mr(mr, mr->umem); + ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); if (ret) goto err_mr;