diff options
Diffstat (limited to 'drivers/infiniband/hw/hns/hns_roce_mr.c')
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_mr.c | 1644 |
1 files changed, 529 insertions, 1115 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 176f34692f88..4c0bbb12770d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -66,645 +66,89 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, - unsigned long *seg) +static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, + u32 pd, u64 iova, u64 size, u32 access) { - int o; - u32 m; + struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned long obj = 0; + int err; - spin_lock(&buddy->lock); - - for (o = order; o <= buddy->max_order; ++o) { - if (buddy->num_free[o]) { - m = 1 << (buddy->max_order - o); - *seg = find_first_bit(buddy->bits[o], m); - if (*seg < m) - goto found; - } - } - spin_unlock(&buddy->lock); - return -EINVAL; - - found: - clear_bit(*seg, buddy->bits[o]); - --buddy->num_free[o]; - - while (o > order) { - --o; - *seg <<= 1; - set_bit(*seg ^ 1, buddy->bits[o]); - ++buddy->num_free[o]; - } - - spin_unlock(&buddy->lock); - - *seg <<= order; - return 0; -} - -static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg, - int order) -{ - seg >>= order; - - spin_lock(&buddy->lock); - - while (test_bit(seg ^ 1, buddy->bits[order])) { - clear_bit(seg ^ 1, buddy->bits[order]); - --buddy->num_free[order]; - seg >>= 1; - ++order; - } - - set_bit(seg, buddy->bits[order]); - ++buddy->num_free[order]; - - spin_unlock(&buddy->lock); -} - -static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order) -{ - int i, s; - - buddy->max_order = max_order; - spin_lock_init(&buddy->lock); - buddy->bits = kcalloc(buddy->max_order + 1, - sizeof(*buddy->bits), - GFP_KERNEL); - buddy->num_free = kcalloc(buddy->max_order + 1, - sizeof(*buddy->num_free), - GFP_KERNEL); - if (!buddy->bits || !buddy->num_free) - goto err_out; - - for (i = 0; i <= buddy->max_order; ++i) { - s = BITS_TO_LONGS(1 << (buddy->max_order - i)); - buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL | - __GFP_NOWARN); - if (!buddy->bits[i]) { - buddy->bits[i] = vzalloc(array_size(s, sizeof(long))); - if (!buddy->bits[i]) - goto err_out_free; - } - } - - set_bit(0, buddy->bits[buddy->max_order]); - buddy->num_free[buddy->max_order] = 1; - - return 0; - -err_out_free: - for (i = 0; i <= buddy->max_order; ++i) - kvfree(buddy->bits[i]); - -err_out: - kfree(buddy->bits); - kfree(buddy->num_free); - return -ENOMEM; -} - -static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy) -{ - int i; - - for (i = 0; i <= buddy->max_order; ++i) - kvfree(buddy->bits[i]); - - kfree(buddy->bits); - kfree(buddy->num_free); -} - -static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, - unsigned long *seg, u32 mtt_type) -{ - struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - struct hns_roce_hem_table *table; - struct hns_roce_buddy *buddy; - int ret; - - switch (mtt_type) { - case MTT_TYPE_WQE: - buddy = &mr_table->mtt_buddy; - table = &mr_table->mtt_table; - break; - case MTT_TYPE_CQE: - buddy = &mr_table->mtt_cqe_buddy; - table = &mr_table->mtt_cqe_table; - break; - case MTT_TYPE_SRQWQE: - buddy = &mr_table->mtt_srqwqe_buddy; - table = &mr_table->mtt_srqwqe_table; - break; - case MTT_TYPE_IDX: - buddy = &mr_table->mtt_idx_buddy; - table = &mr_table->mtt_idx_table; - break; - default: - dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n", - mtt_type); - return -EINVAL; - } - - ret = hns_roce_buddy_alloc(buddy, order, seg); - if (ret) - return ret; - - ret = hns_roce_table_get_range(hr_dev, table, *seg, - *seg + (1 << order) - 1); - if (ret) { - hns_roce_buddy_free(buddy, *seg, order); - return ret; - } - - return 0; -} - -int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, - struct hns_roce_mtt *mtt) -{ - int ret; - int i; - - /* Page num is zero, correspond to DMA memory register */ - if (!npages) { - mtt->order = -1; - mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT; - return 0; - } - - /* Note: if page_shift is zero, FAST memory register */ - mtt->page_shift = page_shift; - - /* Compute MTT entry necessary */ - for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages; - i <<= 1) - ++mtt->order; - - /* Allocate MTT entry */ - ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg, - mtt->mtt_type); - if (ret) - return -ENOMEM; - - return 0; -} - -void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) -{ - struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - - if (mtt->order < 0) - return; - - switch (mtt->mtt_type) { - case MTT_TYPE_WQE: - hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, - mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, - mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); - break; - case MTT_TYPE_CQE: - hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg, - mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table, - mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); - break; - case MTT_TYPE_SRQWQE: - hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg, - mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table, - mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); - break; - case MTT_TYPE_IDX: - hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg, - mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table, - mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); - break; - default: - dev_err(hr_dev->dev, - "Unsupport mtt type %d, clean mtt failed\n", - mtt->mtt_type); - break; - } -} - -static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr, int err_loop_index, - int loop_i, int loop_j) -{ - struct device *dev = hr_dev->dev; - u32 mhop_num; - u32 pbl_bt_sz; - u64 bt_idx; - int i, j; - - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - mhop_num = hr_dev->caps.pbl_hop_num; - - i = loop_i; - if (mhop_num == 3 && err_loop_index == 2) { - for (; i >= 0; i--) { - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - - for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { - if (i == loop_i && j >= loop_j) - break; - - bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j; - dma_free_coherent(dev, pbl_bt_sz, - mr->pbl_bt_l2[bt_idx], - mr->pbl_l2_dma_addr[bt_idx]); - } - } - } else if (mhop_num == 3 && err_loop_index == 1) { - for (i -= 1; i >= 0; i--) { - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - - for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { - bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j; - dma_free_coherent(dev, pbl_bt_sz, - mr->pbl_bt_l2[bt_idx], - mr->pbl_l2_dma_addr[bt_idx]); - } - } - } else if (mhop_num == 2 && err_loop_index == 1) { - for (i -= 1; i >= 0; i--) - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - } else { - dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.", - mhop_num, err_loop_index); - return; - } - - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr); - mr->pbl_bt_l0 = NULL; - mr->pbl_l0_dma_addr = 0; -} -static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr, u32 pbl_bt_sz) -{ - struct device *dev = hr_dev->dev; - - if (npages > pbl_bt_sz / 8) { - dev_err(dev, "npages %d is larger than buf_pg_sz!", - npages); - return -EINVAL; - } - mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) + /* Allocate a key for mr from mr_table */ + err = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &obj); + if (err) { + ibdev_err(ibdev, + "failed to alloc bitmap for MR key, ret = %d.\n", + err); return -ENOMEM; - - mr->pbl_size = npages; - mr->pbl_ba = mr->pbl_dma_addr; - mr->pbl_hop_num = 1; - mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; - mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; - return 0; - -} - - -static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr, u32 pbl_bt_sz) -{ - struct device *dev = hr_dev->dev; - int npages_allocated; - u64 pbl_last_bt_num; - u64 pbl_bt_cnt = 0; - u64 size; - int i; - - pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); - - /* alloc L1 BT */ - for (i = 0; i < pbl_bt_sz / 8; i++) { - if (pbl_bt_cnt + 1 < pbl_last_bt_num) { - size = pbl_bt_sz; - } else { - npages_allocated = i * (pbl_bt_sz / 8); - size = (npages - npages_allocated) * 8; - } - mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size, - &(mr->pbl_l1_dma_addr[i]), - GFP_KERNEL); - if (!mr->pbl_bt_l1[i]) { - hns_roce_loop_free(hr_dev, mr, 1, i, 0); - return -ENOMEM; - } - - *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; - - pbl_bt_cnt++; - if (pbl_bt_cnt >= pbl_last_bt_num) - break; } - mr->l0_chunk_last_num = i + 1; - - return 0; -} - -static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr, u32 pbl_bt_sz) -{ - struct device *dev = hr_dev->dev; - int mr_alloc_done = 0; - int npages_allocated; - u64 pbl_last_bt_num; - u64 pbl_bt_cnt = 0; - u64 bt_idx; - u64 size; - int i; - int j = 0; - - pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); - - mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num, - sizeof(*mr->pbl_l2_dma_addr), - GFP_KERNEL); - if (!mr->pbl_l2_dma_addr) - return -ENOMEM; - - mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num, - sizeof(*mr->pbl_bt_l2), - GFP_KERNEL); - if (!mr->pbl_bt_l2) - goto err_kcalloc_bt_l2; - - /* alloc L1, L2 BT */ - for (i = 0; i < pbl_bt_sz / 8; i++) { - mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz, - &(mr->pbl_l1_dma_addr[i]), - GFP_KERNEL); - if (!mr->pbl_bt_l1[i]) { - hns_roce_loop_free(hr_dev, mr, 1, i, 0); - goto err_dma_alloc_l0; - } - - *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; - - for (j = 0; j < pbl_bt_sz / 8; j++) { - bt_idx = i * pbl_bt_sz / 8 + j; - - if (pbl_bt_cnt + 1 < pbl_last_bt_num) { - size = pbl_bt_sz; - } else { - npages_allocated = bt_idx * - (pbl_bt_sz / 8); - size = (npages - npages_allocated) * 8; - } - mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent( - dev, size, - &(mr->pbl_l2_dma_addr[bt_idx]), - GFP_KERNEL); - if (!mr->pbl_bt_l2[bt_idx]) { - hns_roce_loop_free(hr_dev, mr, 2, i, j); - goto err_dma_alloc_l0; - } - - *(mr->pbl_bt_l1[i] + j) = - mr->pbl_l2_dma_addr[bt_idx]; - - pbl_bt_cnt++; - if (pbl_bt_cnt >= pbl_last_bt_num) { - mr_alloc_done = 1; - break; - } - } + mr->iova = iova; /* MR va starting addr */ + mr->size = size; /* MR addr range */ + mr->pd = pd; /* MR num */ + mr->access = access; /* MR access permit */ + mr->enabled = 0; /* MR active status */ + mr->key = hw_index_to_key(obj); /* MR key */ - if (mr_alloc_done) - break; + err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj); + if (err) { + ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err); + goto err_free_bitmap; } - mr->l0_chunk_last_num = i + 1; - mr->l1_chunk_last_num = j + 1; - - return 0; - -err_dma_alloc_l0: - kfree(mr->pbl_bt_l2); - mr->pbl_bt_l2 = NULL; - -err_kcalloc_bt_l2: - kfree(mr->pbl_l2_dma_addr); - mr->pbl_l2_dma_addr = NULL; - - return -ENOMEM; +err_free_bitmap: + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR); + return err; } - -/* PBL multi hop addressing */ -static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr) +static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - struct device *dev = hr_dev->dev; - u32 pbl_bt_sz; - u32 mhop_num; - - mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num); - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - - if (mhop_num == HNS_ROCE_HOP_NUM_0) - return 0; - - if (mhop_num == 1) - return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz); - - mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8, - sizeof(*mr->pbl_l1_dma_addr), - GFP_KERNEL); - if (!mr->pbl_l1_dma_addr) - return -ENOMEM; - - mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1), - GFP_KERNEL); - if (!mr->pbl_bt_l1) - goto err_kcalloc_bt_l1; - - /* alloc L0 BT */ - mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz, - &(mr->pbl_l0_dma_addr), - GFP_KERNEL); - if (!mr->pbl_bt_l0) - goto err_kcalloc_l2_dma; - - if (mhop_num == 2) { - if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz)) - goto err_kcalloc_l2_dma; - } - - if (mhop_num == 3) { - if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz)) - goto err_kcalloc_l2_dma; - } - + unsigned long obj = key_to_hw_index(mr->key); - mr->pbl_size = npages; - mr->pbl_ba = mr->pbl_l0_dma_addr; - mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; - mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; - mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; - - return 0; - -err_kcalloc_l2_dma: - kfree(mr->pbl_bt_l1); - mr->pbl_bt_l1 = NULL; - -err_kcalloc_bt_l1: - kfree(mr->pbl_l1_dma_addr); - mr->pbl_l1_dma_addr = NULL; - - return -ENOMEM; + hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj); + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR); } -static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, - u64 size, u32 access, int npages, - struct hns_roce_mr *mr) +static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, + size_t length, struct ib_udata *udata, u64 start, + int access) { - struct device *dev = hr_dev->dev; - unsigned long index = 0; - int ret; - - /* Allocate a key for mr from mr_table */ - ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); - if (ret) - return -ENOMEM; - - mr->iova = iova; /* MR va starting addr */ - mr->size = size; /* MR addr range */ - mr->pd = pd; /* MR num */ - mr->access = access; /* MR access permit */ - mr->enabled = 0; /* MR active status */ - mr->key = hw_index_to_key(index); /* MR key */ - - if (size == ~0ull) { - mr->pbl_buf = NULL; - mr->pbl_dma_addr = 0; - /* PBL multi-hop addressing parameters */ - mr->pbl_bt_l2 = NULL; - mr->pbl_bt_l1 = NULL; - mr->pbl_bt_l0 = NULL; - mr->pbl_l2_dma_addr = NULL; - mr->pbl_l1_dma_addr = NULL; - mr->pbl_l0_dma_addr = 0; - } else { - if (!hr_dev->caps.pbl_hop_num) { - mr->pbl_buf = dma_alloc_coherent(dev, - npages * BA_BYTE_LEN, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) - return -ENOMEM; - } else { - ret = hns_roce_mhop_alloc(hr_dev, npages, mr); - } - } + struct ib_device *ibdev = &hr_dev->ib_dev; + bool is_fast = mr->type == MR_TYPE_FRMR; + struct hns_roce_buf_attr buf_attr = {}; + int err; + + mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num; + buf_attr.page_shift = is_fast ? PAGE_SHIFT : + hr_dev->caps.pbl_buf_pg_sz + HNS_HW_PAGE_SHIFT; + buf_attr.region[0].size = length; + buf_attr.region[0].hopnum = mr->pbl_hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; + buf_attr.user_access = access; + /* fast MR's buffer is alloced before mapping, not at creation */ + buf_attr.mtt_only = is_fast; + + err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr, + hr_dev->caps.pbl_ba_pg_sz + HNS_HW_PAGE_SHIFT, + udata, start); + if (err) + ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err); + else + mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count; - return ret; + return err; } -static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr) +static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - struct device *dev = hr_dev->dev; - int npages_allocated; - int npages; - int i, j; - u32 pbl_bt_sz; - u32 mhop_num; - u64 bt_idx; - - npages = mr->pbl_size; - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num; - - if (mhop_num == HNS_ROCE_HOP_NUM_0) - return; - - if (mhop_num == 1) { - dma_free_coherent(dev, (unsigned int)(npages * BA_BYTE_LEN), - mr->pbl_buf, mr->pbl_dma_addr); - return; - } - - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, - mr->pbl_l0_dma_addr); - - if (mhop_num == 2) { - for (i = 0; i < mr->l0_chunk_last_num; i++) { - if (i == mr->l0_chunk_last_num - 1) { - npages_allocated = - i * (pbl_bt_sz / BA_BYTE_LEN); - - dma_free_coherent(dev, - (npages - npages_allocated) * BA_BYTE_LEN, - mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - - break; - } - - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - } - } else if (mhop_num == 3) { - for (i = 0; i < mr->l0_chunk_last_num; i++) { - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - - for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { - bt_idx = i * (pbl_bt_sz / BA_BYTE_LEN) + j; - - if ((i == mr->l0_chunk_last_num - 1) - && j == mr->l1_chunk_last_num - 1) { - npages_allocated = bt_idx * - (pbl_bt_sz / BA_BYTE_LEN); - - dma_free_coherent(dev, - (npages - npages_allocated) * - BA_BYTE_LEN, - mr->pbl_bt_l2[bt_idx], - mr->pbl_l2_dma_addr[bt_idx]); - - break; - } - - dma_free_coherent(dev, pbl_bt_sz, - mr->pbl_bt_l2[bt_idx], - mr->pbl_l2_dma_addr[bt_idx]); - } - } - } - - kfree(mr->pbl_bt_l1); - kfree(mr->pbl_l1_dma_addr); - mr->pbl_bt_l1 = NULL; - mr->pbl_l1_dma_addr = NULL; - if (mhop_num == 3) { - kfree(mr->pbl_bt_l2); - kfree(mr->pbl_l2_dma_addr); - mr->pbl_bt_l2 = NULL; - mr->pbl_l2_dma_addr = NULL; - } + hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr); } static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - struct device *dev = hr_dev->dev; - int npages = 0; + struct ib_device *ibdev = &hr_dev->ib_dev; int ret; if (mr->enabled) { @@ -712,27 +156,12 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); if (ret) - dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); + ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n", + ret); } - if (mr->size != ~0ULL) { - if (mr->type == MR_TYPE_MR) - npages = ib_umem_page_count(mr->umem); - - if (!hr_dev->caps.pbl_hop_num) - dma_free_coherent(dev, - (unsigned int)(npages * BA_BYTE_LEN), - mr->pbl_buf, mr->pbl_dma_addr); - else - hns_roce_mhop_free(hr_dev, mr); - } - - if (mr->enabled) - hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, - key_to_hw_index(mr->key)); - - hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, - key_to_hw_index(mr->key), BITMAP_NO_RR); + free_mr_pbl(hr_dev, mr); + free_mr_key(hr_dev, mr); } static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, @@ -742,18 +171,12 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, unsigned long mtpt_idx = key_to_hw_index(mr->key); struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; - struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - - /* Prepare HEM entry memory */ - ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx); - if (ret) - return ret; /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) { ret = PTR_ERR(mailbox); - goto err_table; + return ret; } if (mr->type != MR_TYPE_FRMR) @@ -780,137 +203,6 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, err_page: hns_roce_free_cmd_mailbox(hr_dev, mailbox); -err_table: - hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx); - return ret; -} - -static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, u32 start_index, - u32 npages, u64 *page_list) -{ - struct hns_roce_hem_table *table; - dma_addr_t dma_handle; - __le64 *mtts; - u32 bt_page_size; - u32 i; - - switch (mtt->mtt_type) { - case MTT_TYPE_WQE: - table = &hr_dev->mr_table.mtt_table; - bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_CQE: - table = &hr_dev->mr_table.mtt_cqe_table; - bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_SRQWQE: - table = &hr_dev->mr_table.mtt_srqwqe_table; - bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_IDX: - table = &hr_dev->mr_table.mtt_idx_table; - bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); - break; - default: - return -EINVAL; - } - - /* All MTTs must fit in the same page */ - if (start_index / (bt_page_size / sizeof(u64)) != - (start_index + npages - 1) / (bt_page_size / sizeof(u64))) - return -EINVAL; - - if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) - return -EINVAL; - - mtts = hns_roce_table_find(hr_dev, table, - mtt->first_seg + - start_index / HNS_ROCE_MTT_ENTRY_PER_SEG, - &dma_handle); - if (!mtts) - return -ENOMEM; - - /* Save page addr, low 12 bits : 0 */ - for (i = 0; i < npages; ++i) { - if (!hr_dev->caps.mtt_hop_num) - mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT); - else - mtts[i] = cpu_to_le64(page_list[i]); - } - - return 0; -} - -static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, u32 start_index, - u32 npages, u64 *page_list) -{ - int chunk; - int ret; - u32 bt_page_size; - - if (mtt->order < 0) - return -EINVAL; - - switch (mtt->mtt_type) { - case MTT_TYPE_WQE: - bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_CQE: - bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_SRQWQE: - bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_IDX: - bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); - break; - default: - dev_err(hr_dev->dev, - "Unsupport mtt type %d, write mtt failed\n", - mtt->mtt_type); - return -EINVAL; - } - - while (npages > 0) { - chunk = min_t(int, bt_page_size / sizeof(u64), npages); - - ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk, - page_list); - if (ret) - return ret; - - npages -= chunk; - start_index += chunk; - page_list += chunk; - } - - return 0; -} - -int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, struct hns_roce_buf *buf) -{ - u64 *page_list; - int ret; - u32 i; - - page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL); - if (!page_list) - return -ENOMEM; - - for (i = 0; i < buf->npages; ++i) { - if (buf->nbufs == 1) - page_list[i] = buf->direct.map + (i << buf->page_shift); - else - page_list[i] = buf->page_list[i].map; - - } - ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list); - - kfree(page_list); - return ret; } @@ -923,50 +215,6 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) hr_dev->caps.num_mtpts, hr_dev->caps.num_mtpts - 1, hr_dev->caps.reserved_mrws, 0); - if (ret) - return ret; - - ret = hns_roce_buddy_init(&mr_table->mtt_buddy, - ilog2(hr_dev->caps.num_mtt_segs)); - if (ret) - goto err_buddy; - - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) { - ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy, - ilog2(hr_dev->caps.num_cqe_segs)); - if (ret) - goto err_buddy_cqe; - } - - if (hr_dev->caps.num_srqwqe_segs) { - ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy, - ilog2(hr_dev->caps.num_srqwqe_segs)); - if (ret) - goto err_buddy_srqwqe; - } - - if (hr_dev->caps.num_idx_segs) { - ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy, - ilog2(hr_dev->caps.num_idx_segs)); - if (ret) - goto err_buddy_idx; - } - - return 0; - -err_buddy_idx: - if (hr_dev->caps.num_srqwqe_segs) - hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); - -err_buddy_srqwqe: - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); - -err_buddy_cqe: - hns_roce_buddy_cleanup(&mr_table->mtt_buddy); - -err_buddy: - hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); return ret; } @@ -974,30 +222,24 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - if (hr_dev->caps.num_idx_segs) - hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy); - if (hr_dev->caps.num_srqwqe_segs) - hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); - hns_roce_buddy_cleanup(&mr_table->mtt_buddy); - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); } struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) { + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_mr *mr; int ret; - mr = kmalloc(sizeof(*mr), GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (mr == NULL) return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_DMA; /* Allocate memory region key */ - ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0, - ~0ULL, acc, 0, mr); + hns_roce_hem_list_init(&mr->pbl_mtr.hem_list); + ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc); if (ret) goto err_free; @@ -1006,203 +248,52 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->umem = NULL; return &mr->ibmr; - err_mr: - hns_roce_mr_free(to_hr_dev(pd->device), mr); + free_mr_key(hr_dev, mr); err_free: kfree(mr); return ERR_PTR(ret); } -int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, struct ib_umem *umem) -{ - struct device *dev = hr_dev->dev; - struct sg_dma_page_iter sg_iter; - unsigned int order; - int npage = 0; - int ret = 0; - int i; - u64 page_addr; - u64 *pages; - u32 bt_page_size; - u32 n; - - switch (mtt->mtt_type) { - case MTT_TYPE_WQE: - order = hr_dev->caps.mtt_ba_pg_sz; - break; - case MTT_TYPE_CQE: - order = hr_dev->caps.cqe_ba_pg_sz; - break; - case MTT_TYPE_SRQWQE: - order = hr_dev->caps.srqwqe_ba_pg_sz; - break; - case MTT_TYPE_IDX: - order = hr_dev->caps.idx_ba_pg_sz; - break; - default: - dev_err(dev, "Unsupport mtt type %d, write mtt failed\n", - mtt->mtt_type); - return -EINVAL; - } - - bt_page_size = 1 << (order + PAGE_SHIFT); - - pages = (u64 *) __get_free_pages(GFP_KERNEL, order); - if (!pages) - return -ENOMEM; - - i = n = 0; - - for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - page_addr = sg_page_iter_dma_address(&sg_iter); - if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) { - if (page_addr & ((1 << mtt->page_shift) - 1)) { - dev_err(dev, - "page_addr is not page_shift %d alignment!\n", - mtt->page_shift); - ret = -EINVAL; - goto out; - } - pages[i++] = page_addr; - } - npage++; - if (i == bt_page_size / sizeof(u64)) { - ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); - if (ret) - goto out; - n += i; - i = 0; - } - } - - if (i) - ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); - -out: - free_pages((unsigned long) pages, order); - return ret; -} - -static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr, - struct ib_umem *umem) -{ - struct sg_dma_page_iter sg_iter; - int i = 0, j = 0; - u64 page_addr; - u32 pbl_bt_sz; - - if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0) - return 0; - - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - page_addr = sg_page_iter_dma_address(&sg_iter); - if (!hr_dev->caps.pbl_hop_num) { - /* for hip06, page addr is aligned to 4K */ - mr->pbl_buf[i++] = page_addr >> 12; - } else if (hr_dev->caps.pbl_hop_num == 1) { - mr->pbl_buf[i++] = page_addr; - } else { - if (hr_dev->caps.pbl_hop_num == 2) - mr->pbl_bt_l1[i][j] = page_addr; - else if (hr_dev->caps.pbl_hop_num == 3) - mr->pbl_bt_l2[i][j] = page_addr; - - j++; - if (j >= (pbl_bt_sz / BA_BYTE_LEN)) { - i++; - j = 0; - } - } - } - - /* Memory barrier */ - mb(); - - return 0; -} - struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); - struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; - int bt_size; int ret; - int n; - int i; - mr = kmalloc(sizeof(*mr), GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(pd->device, start, length, access_flags); - if (IS_ERR(mr->umem)) { - ret = PTR_ERR(mr->umem); - goto err_free; - } - - n = ib_umem_page_count(mr->umem); - - if (!hr_dev->caps.pbl_hop_num) { - if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { - dev_err(dev, - " MR len %lld err. MR is limited to 4G at most!\n", - length); - ret = -EINVAL; - goto err_umem; - } - } else { - u64 pbl_size = 1; - - bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / - BA_BYTE_LEN; - for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) - pbl_size *= bt_size; - if (n > pbl_size) { - dev_err(dev, - " MR len %lld err. MR page num is limited to %lld!\n", - length, pbl_size); - ret = -EINVAL; - goto err_umem; - } - } - mr->type = MR_TYPE_MR; - - ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length, - access_flags, n, mr); + ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length, + access_flags); if (ret) - goto err_umem; + goto err_alloc_mr; - ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); + ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags); if (ret) - goto err_mr; + goto err_alloc_key; ret = hns_roce_mr_enable(hr_dev, mr); if (ret) - goto err_mr; + goto err_alloc_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; + mr->ibmr.length = length; return &mr->ibmr; -err_mr: - hns_roce_mr_free(hr_dev, mr); - -err_umem: - ib_umem_release(mr->umem); - -err_free: +err_alloc_pbl: + free_mr_pbl(hr_dev, mr); +err_alloc_key: + free_mr_key(hr_dev, mr); +err_alloc_mr: kfree(mr); return ERR_PTR(ret); } @@ -1214,84 +305,36 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, u32 pdn, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); - struct device *dev = hr_dev->dev; - int npages; int ret; - if (mr->size != ~0ULL) { - npages = ib_umem_page_count(mr->umem); - - if (hr_dev->caps.pbl_hop_num) - hns_roce_mhop_free(hr_dev, mr); - else - dma_free_coherent(dev, npages * 8, - mr->pbl_buf, mr->pbl_dma_addr); - } - ib_umem_release(mr->umem); - - mr->umem = ib_umem_get(ibmr->device, start, length, mr_access_flags); - if (IS_ERR(mr->umem)) { - ret = PTR_ERR(mr->umem); - mr->umem = NULL; - return -ENOMEM; - } - npages = ib_umem_page_count(mr->umem); - - if (hr_dev->caps.pbl_hop_num) { - ret = hns_roce_mhop_alloc(hr_dev, npages, mr); - if (ret) - goto release_umem; - } else { - mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) { - ret = -ENOMEM; - goto release_umem; - } + free_mr_pbl(hr_dev, mr); + ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags); + if (ret) { + ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret); + return ret; } ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, mr_access_flags, virt_addr, length, mailbox->buf); - if (ret) - goto release_umem; - - - ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); if (ret) { - if (mr->size != ~0ULL) { - npages = ib_umem_page_count(mr->umem); - - if (hr_dev->caps.pbl_hop_num) - hns_roce_mhop_free(hr_dev, mr); - else - dma_free_coherent(dev, npages * 8, - mr->pbl_buf, - mr->pbl_dma_addr); - } - - goto release_umem; + ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret); + free_mr_pbl(hr_dev, mr); } - return 0; - -release_umem: - ib_umem_release(mr->umem); return ret; - } - int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); + struct ib_device *ib_dev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); struct hns_roce_cmd_mailbox *mailbox; - struct device *dev = hr_dev->dev; unsigned long mtpt_idx; u32 pdn = 0; int ret; @@ -1312,7 +355,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx); if (ret) - dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); + ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret); mr->enabled = 0; @@ -1336,8 +379,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); if (ret) { - dev_err(dev, "CREATE_MPT failed (%d)\n", ret); - ib_umem_release(mr->umem); + ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret); goto free_cmd_mbox; } @@ -1365,8 +407,6 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata); } else { hns_roce_mr_free(hr_dev, mr); - - ib_umem_release(mr->umem); kfree(mr); } @@ -1380,12 +420,8 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; u64 length; - u32 page_size; int ret; - page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT); - length = max_num_sg * page_size; - if (mr_type != IB_MR_TYPE_MEM_REG) return ERR_PTR(-EINVAL); @@ -1402,23 +438,28 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, mr->type = MR_TYPE_FRMR; /* Allocate memory region key */ - ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length, - 0, max_num_sg, mr); + length = max_num_sg * (1 << PAGE_SHIFT); + ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0); if (ret) goto err_free; + ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0); + if (ret) + goto err_key; + ret = hns_roce_mr_enable(hr_dev, mr); if (ret) - goto err_mr; + goto err_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->umem = NULL; + mr->ibmr.length = length; return &mr->ibmr; -err_mr: - hns_roce_mr_free(to_hr_dev(pd->device), mr); - +err_key: + free_mr_key(hr_dev, mr); +err_pbl: + free_mr_pbl(hr_dev, mr); err_free: kfree(mr); return ERR_PTR(ret); @@ -1428,19 +469,54 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr) { struct hns_roce_mr *mr = to_hr_mr(ibmr); - mr->pbl_buf[mr->npages++] = addr; + if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) { + mr->page_list[mr->npages++] = addr; + return 0; + } - return 0; + return -ENOBUFS; } int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); + struct hns_roce_mtr *mtr = &mr->pbl_mtr; + int ret = 0; mr->npages = 0; + mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count, + sizeof(dma_addr_t), GFP_KERNEL); + if (!mr->page_list) + return ret; + + ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); + if (ret < 1) { + ibdev_err(ibdev, "failed to store sg pages %d %d, cnt = %d.\n", + mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret); + goto err_page_list; + } + + mtr->hem_cfg.region[0].offset = 0; + mtr->hem_cfg.region[0].count = mr->npages; + mtr->hem_cfg.region[0].hopnum = mr->pbl_hop_num; + mtr->hem_cfg.region_count = 1; + ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages); + if (ret) { + ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); + ret = 0; + } else { + mr->pbl_mtr.hem_cfg.buf_pg_shift = ilog2(ibmr->page_size); + ret = mr->npages; + } + +err_page_list: + kvfree(mr->page_list); + mr->page_list = NULL; - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); + return ret; } static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, @@ -1564,32 +640,23 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) return 0; } -void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, - int buf_pg_shift) -{ - hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift); - mtr->buf_pg_shift = buf_pg_shift; -} - -void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr) -{ - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); -} - -static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr, dma_addr_t *bufs, - struct hns_roce_buf_region *r) +static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, struct hns_roce_buf_region *region) { + __le64 *mtts; int offset; int count; int npage; - u64 *mtts; + u64 addr; int end; int i; - offset = r->offset; - end = offset + r->count; + /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */ + if (!region->hopnum) + return 0; + + offset = region->offset; + end = offset + region->count; npage = 0; while (offset < end) { mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, @@ -1597,13 +664,13 @@ static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, if (!mtts) return -ENOBUFS; - /* Save page addr, low 12 bits : 0 */ for (i = 0; i < count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT; + addr = to_hr_hw_page_addr(pages[npage]); else - mtts[i] = bufs[npage]; + addr = pages[npage]; + mtts[i] = cpu_to_le64(addr); npage++; } offset += count; @@ -1612,69 +679,416 @@ static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, return 0; } -int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t **bufs, struct hns_roce_buf_region *regions, - int region_cnt) +static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) { - struct hns_roce_buf_region *r; - int ret; int i; - ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions, - region_cnt); - if (ret) - return ret; + for (i = 0; i < attr->region_count; i++) + if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 && + attr->region[i].hopnum > 0) + return true; - for (i = 0; i < region_cnt; i++) { - r = ®ions[i]; - ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r); + /* because the mtr only one root base address, when hopnum is 0 means + * root base address equals the first buffer address, thus all alloced + * memory must in a continuous space accessed by direct mode. + */ + return false; +} + +static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) +{ + size_t size = 0; + int i; + + for (i = 0; i < attr->region_count; i++) + size += attr->region[i].size; + + return size; +} + +static inline int mtr_umem_page_count(struct ib_umem *umem, + unsigned int page_shift) +{ + int count = ib_umem_page_count(umem); + + if (page_shift >= PAGE_SHIFT) + count >>= page_shift - PAGE_SHIFT; + else + count <<= PAGE_SHIFT - page_shift; + + return count; +} + +static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, + unsigned int page_shift) +{ + if (is_direct) + return ALIGN(alloc_size, 1 << page_shift); + else + return HNS_HW_DIRECT_PAGE_COUNT << page_shift; +} + +/* + * check the given pages in continuous address space + * Returns 0 on success, or the error page num. + */ +static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count, + unsigned int page_shift) +{ + size_t page_size = 1 << page_shift; + int i; + + for (i = 1; i < page_count; i++) + if (pages[i] - pages[i - 1] != page_size) + return i; + + return 0; +} + +static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + /* release user buffers */ + if (mtr->umem) { + ib_umem_release(mtr->umem); + mtr->umem = NULL; + } + + /* release kernel buffers */ + if (mtr->kmem) { + hns_roce_buf_free(hr_dev, mtr->kmem); + kfree(mtr->kmem); + mtr->kmem = NULL; + } +} + +static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, bool is_direct, + struct ib_udata *udata, unsigned long user_addr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned int max_pg_shift = buf_attr->page_shift; + unsigned int best_pg_shift = 0; + int all_pg_count = 0; + size_t direct_size; + size_t total_size; + unsigned long tmp; + int ret = 0; + + total_size = mtr_bufs_size(buf_attr); + if (total_size < 1) { + ibdev_err(ibdev, "Failed to check mtr size\n"); + return -EINVAL; + } + + if (udata) { + mtr->kmem = NULL; + mtr->umem = ib_umem_get(ibdev, user_addr, total_size, + buf_attr->user_access); + if (IS_ERR_OR_NULL(mtr->umem)) { + ibdev_err(ibdev, "Failed to get umem, ret %ld\n", + PTR_ERR(mtr->umem)); + return -ENOMEM; + } + if (buf_attr->fixed_page) { + best_pg_shift = max_pg_shift; + } else { + tmp = GENMASK(max_pg_shift, 0); + ret = ib_umem_find_best_pgsz(mtr->umem, tmp, user_addr); + best_pg_shift = (ret <= PAGE_SIZE) ? + PAGE_SHIFT : ilog2(ret); + } + all_pg_count = mtr_umem_page_count(mtr->umem, best_pg_shift); + ret = 0; + } else { + mtr->umem = NULL; + mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL); + if (!mtr->kmem) { + ibdev_err(ibdev, "Failed to alloc kmem\n"); + return -ENOMEM; + } + direct_size = mtr_kmem_direct_size(is_direct, total_size, + max_pg_shift); + ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, + mtr->kmem, max_pg_shift); if (ret) { - dev_err(hr_dev->dev, - "write mtr[%d/%d] err %d,offset=%d.\n", - i, region_cnt, ret, r->offset); - goto err_write; + ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); + goto err_alloc_mem; + } else { + best_pg_shift = max_pg_shift; + all_pg_count = mtr->kmem->npages; } } - return 0; + /* must bigger than minimum hardware page shift */ + if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { + ret = -EINVAL; + ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n", + best_pg_shift, all_pg_count); + goto err_alloc_mem; + } -err_write: - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); + mtr->hem_cfg.buf_pg_shift = best_pg_shift; + mtr->hem_cfg.buf_pg_count = all_pg_count; + return 0; +err_alloc_mem: + mtr_free_bufs(hr_dev, mtr); return ret; } +static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, int count, unsigned int page_shift) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int npage; + int err; + + if (mtr->umem) + npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0, + mtr->umem, page_shift); + else + npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0, + mtr->kmem); + + if (mtr->hem_cfg.is_direct && npage > 1) { + err = mtr_check_direct_pages(pages, npage, page_shift); + if (err) { + ibdev_err(ibdev, "Failed to check %s direct page-%d\n", + mtr->umem ? "user" : "kernel", err); + npage = err; + } + } + + return npage; +} + +int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, int page_cnt) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_region *r; + int err; + int i; + + for (i = 0; i < mtr->hem_cfg.region_count; i++) { + r = &mtr->hem_cfg.region[i]; + if (r->offset + r->count > page_cnt) { + err = -EINVAL; + ibdev_err(ibdev, + "Failed to check mtr%d end %d + %d, max %d\n", + i, r->offset, r->count, page_cnt); + return err; + } + + err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); + if (err) { + ibdev_err(ibdev, + "Failed to map mtr%d offset %d, err %d\n", + i, r->offset, err); + return err; + } + } + + return 0; +} + int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { - u64 *mtts = mtt_buf; int mtt_count; int total = 0; - u64 *addr; + __le64 *mtts; int npage; + u64 addr; int left; - if (mtts == NULL || mtt_max < 1) + if (!mtt_buf || mtt_max < 1) goto done; + /* no mtt memory in direct mode, so just return the buffer address */ + if (mtr->hem_cfg.is_direct) { + npage = offset; + for (total = 0; total < mtt_max; total++, npage++) { + addr = mtr->hem_cfg.root_ba + + (npage << mtr->hem_cfg.buf_pg_shift); + + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) + mtt_buf[total] = to_hr_hw_page_addr(addr); + else + mtt_buf[total] = addr; + } + + goto done; + } + left = mtt_max; while (left > 0) { mtt_count = 0; - addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, + mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset + total, &mtt_count, NULL); - if (!addr || !mtt_count) + if (!mtts || !mtt_count) goto done; npage = min(mtt_count, left); - memcpy(&mtts[total], addr, BA_BYTE_LEN * npage); left -= npage; - total += npage; + for (mtt_count = 0; mtt_count < npage; mtt_count++) + mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]); } done: if (base_addr) - *base_addr = mtr->hem_list.root_ba; + *base_addr = mtr->hem_cfg.root_ba; return total; } + +/* convert buffer size to page index and page count */ +static unsigned int mtr_init_region(struct hns_roce_buf_attr *attr, + int page_cnt, + struct hns_roce_buf_region *regions, + int region_cnt, unsigned int page_shift) +{ + unsigned int page_size = 1 << page_shift; + int max_region = attr->region_count; + struct hns_roce_buf_region *r; + unsigned int i = 0; + int page_idx = 0; + + for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) { + r = ®ions[i]; + r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ? + 0 : attr->region[i].hopnum; + r->offset = page_idx; + r->count = DIV_ROUND_UP(attr->region[i].size, page_size); + page_idx += r->count; + } + + return i; +} + +/** + * hns_roce_mtr_create - Create hns memory translate region. + * + * @mtr: memory translate region + * @init_attr: init attribute for creating mtr + * @page_shift: page shift for multi-hop base address table + * @udata: user space context, if it's NULL, means kernel space + * @user_addr: userspace virtual address to start at + * @buf_alloced: mtr has private buffer, true means need to alloc + */ +int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, + unsigned int page_shift, struct ib_udata *udata, + unsigned long user_addr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t *pages = NULL; + int region_cnt = 0; + int all_pg_cnt; + int get_pg_cnt; + bool has_mtt; + int err = 0; + + has_mtt = mtr_has_mtt(buf_attr); + /* if buffer only need mtt, just init the hem cfg */ + if (buf_attr->mtt_only) { + mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift; + mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >> + buf_attr->page_shift; + mtr->umem = NULL; + mtr->kmem = NULL; + } else { + err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata, + user_addr); + if (err) { + ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n", + err); + return err; + } + } + + /* alloc mtt memory */ + all_pg_cnt = mtr->hem_cfg.buf_pg_count; + hns_roce_hem_list_init(&mtr->hem_list); + mtr->hem_cfg.is_direct = !has_mtt; + mtr->hem_cfg.ba_pg_shift = page_shift; + mtr->hem_cfg.region_count = 0; + region_cnt = mtr_init_region(buf_attr, all_pg_cnt, + mtr->hem_cfg.region, + ARRAY_SIZE(mtr->hem_cfg.region), + mtr->hem_cfg.buf_pg_shift); + if (region_cnt < 1) { + err = -ENOBUFS; + ibdev_err(ibdev, "failed to init mtr region %d\n", region_cnt); + goto err_alloc_bufs; + } + + mtr->hem_cfg.region_count = region_cnt; + + if (has_mtt) { + err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + mtr->hem_cfg.region, region_cnt, + page_shift); + if (err) { + ibdev_err(ibdev, "Failed to request mtr hem, err %d\n", + err); + goto err_alloc_bufs; + } + mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; + } + + /* no buffer to map */ + if (buf_attr->mtt_only) + return 0; + + /* alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + ibdev_err(ibdev, "Failed to alloc mtr page list %d\n", + all_pg_cnt); + goto err_alloc_hem_list; + } + + get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, + mtr->hem_cfg.buf_pg_shift); + if (get_pg_cnt != all_pg_cnt) { + ibdev_err(ibdev, "Failed to get mtr page %d != %d\n", + get_pg_cnt, all_pg_cnt); + err = -ENOBUFS; + goto err_alloc_page_list; + } + + if (!has_mtt) { + mtr->hem_cfg.root_ba = pages[0]; + } else { + /* write buffer's dma address to BA table */ + err = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); + if (err) { + ibdev_err(ibdev, "Failed to map mtr pages, err %d\n", + err); + goto err_alloc_page_list; + } + } + + /* drop tmp array */ + kvfree(pages); + return 0; +err_alloc_page_list: + kvfree(pages); +err_alloc_hem_list: + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +err_alloc_bufs: + mtr_free_bufs(hr_dev, mtr); + return err; +} + +void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + /* release multi-hop addressing resource */ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); + + /* free buffers */ + mtr_free_bufs(hr_dev, mtr); +} |