From ceb26569af5bc33a8f47b755fd64f12c81801ce8 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 12 May 2017 09:19:36 -0700 Subject: IB/hfi1: Remove unnecessary initialization from tx request The tx request is unnecessarily initialized in the hot code path with memset(), however, there's no need to do this as most fields are initialized later on. this initialization shows to be costly in the profile. Remove unnecessary initialization from tx request and make sure all variables are initialized properly. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index d55339f5d73b..16fd519216dc 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -607,12 +607,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); req = pq->reqs + info.comp_idx; - memset(req, 0, sizeof(*req)); req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ + req->data_len = 0; req->pq = pq; req->cq = cq; req->status = -1; req->ahg_idx = -1; + req->iov_idx = 0; + req->sent = 0; + req->seqnum = 0; + req->seqcomp = 0; + req->seqsubmitted = 0; + req->flags = 0; + req->tids = NULL; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -701,12 +708,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, /* Save all the IO vector structures */ for (i = 0; i < req->data_iovs; i++) { + req->iovs[i].offset = 0; INIT_LIST_HEAD(&req->iovs[i].list); memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(req->iovs[i].iov)); ret = pin_vector_pages(req, &req->iovs[i]); if (ret) { + req->data_iovs = i; req->status = ret; goto free_req; } @@ -749,6 +758,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, } req->tids = tmp; req->n_tids = ntids; + req->tididx = 0; idx++; } -- cgit v1.2.1 From aa560df381f7199fafa4e7f71382de28f0400270 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 12 May 2017 09:19:47 -0700 Subject: IB/hfi1: Remove unused mk_qpn function Leftover function that is not used. Remove it. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 650305cc0373..e91be05062e6 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -73,12 +73,6 @@ static void iowait_wakeup(struct iowait *wait, int reason); static void iowait_sdma_drained(struct iowait *wait); static void qp_pio_drain(struct rvt_qp *qp); -static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, - struct rvt_qpn_map *map, unsigned off) -{ - return (map - qpt->map) * RVT_BITS_PER_PAGE + off; -} - const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { [IB_WR_RDMA_WRITE] = { .length = sizeof(struct ib_rdma_wr), -- cgit v1.2.1 From 7dafbab3753fcf59bc81748e5b2c5bf04e1c62c7 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:19:55 -0700 Subject: IB/hfi1: Add functions to parse BTH/IB headers Improve code readablity by adding inline functions to read specific BTH/IB fields without knowledge of byte offsets. Reviewed-by: Brian Welty Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 6 +-- drivers/infiniband/hw/hfi1/rc.c | 8 ++-- drivers/infiniband/hw/hfi1/uc.c | 2 +- drivers/infiniband/hw/hfi1/ud.c | 6 +-- drivers/infiniband/hw/hfi1/verbs.c | 6 +-- include/rdma/ib_hdrs.h | 84 +++++++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 2 + include/rdma/rdmavt_qp.h | 2 +- 8 files changed, 101 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a50870e455a3..0583479f2576 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -286,7 +286,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, goto drop; } /* Get the destination QP number. */ - qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; + qp_num = ib_bth_get_qpn(ohdr); if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { struct rvt_qp *qp; unsigned long flags; @@ -438,7 +438,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, case IB_QPT_GSI: case IB_QPT_UD: rlid = ib_get_slid(hdr); - rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; + rqpn = ib_get_sqpn(ohdr); svc_type = IB_CC_SVCTYPE_UD; is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); @@ -461,7 +461,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bth1 = be32_to_cpu(ohdr->bth[1]); if (do_cnp && (bth1 & IB_FECN_SMASK)) { - u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); + u16 pkey = ib_bth_get_pkey(ohdr); return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); } diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 1080778a1f7c..66e6843aab48 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -765,7 +765,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, ohdr->u.aeth = rvt_compute_aeth(qp); sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT); + pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4; hdr.lrh[0] = cpu_to_be16(lrh0); @@ -1009,7 +1009,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) return; } - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); reset_sending_psn(qp, psn); /* @@ -1943,7 +1943,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) is_fecn = process_ecn(qp, packet, false); - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); opcode = ib_bth_get_opcode(ohdr); /* @@ -2388,7 +2388,7 @@ void hfi1_rc_hdrerr( if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) return; - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); opcode = ib_bth_get_opcode(ohdr); /* Only deal with RDMA Writes for now */ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 5da1e4546543..2a5650f8aee0 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -319,7 +319,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) process_ecn(qp, packet, true); - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); opcode = ib_bth_get_opcode(ohdr); /* Compare the PSN verses the expected PSN. */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 6a4e95cefae5..49fe179ad3ae 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -549,7 +549,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, hdr.lrh[3] = cpu_to_be16(slid); plen = 2 /* PBC */ + hwords; - pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); vl = sc_to_vlt(ppd->dd, sc5); pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { @@ -689,8 +689,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u16 slid; u8 extra_bytes; - qkey = be32_to_cpu(ohdr->u.ud.deth[0]); - src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; + qkey = ib_get_qkey(ohdr); + src_qp = ib_get_sqpn(ohdr); dlid = ib_get_dlid(hdr); bth1 = be32_to_cpu(ohdr->bth[1]); slid = ib_get_slid(hdr); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 90e7b77d68e8..128d2917a2d9 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -595,7 +595,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) inc_opstats(tlen, &rcd->opstats->stats[opcode]); /* Get the destination QP number. */ - qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK; + qp_num = ib_bth_get_qpn(packet->ohdr); lid = ib_get_dlid(hdr); if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { @@ -863,7 +863,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, /* No vl15 here */ /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) pbc = hfi1_fault_tx(qp, opcode, pbc); @@ -999,7 +999,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u8 opcode = get_opcode(&tx->phdr.hdr); /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) pbc = hfi1_fault_tx(qp, opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index 5519f31f043a..c124d515f7d5 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -193,8 +193,12 @@ static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth) #define IB_LNH_MASK 3 #define IB_SC_MASK 0xf #define IB_SC_SHIFT 12 +#define IB_SC5_MASK 0x10 #define IB_SL_MASK 0xf #define IB_SL_SHIFT 4 +#define IB_SL_SHIFT 4 +#define IB_LVER_MASK 0xf +#define IB_LVER_SHIFT 8 static inline u8 ib_get_lnh(struct ib_header *hdr) { @@ -206,6 +210,11 @@ static inline u8 ib_get_sc(struct ib_header *hdr) return ((be16_to_cpu(hdr->lrh[0]) >> IB_SC_SHIFT) & IB_SC_MASK); } +static inline bool ib_is_sc5(u16 sc5) +{ + return !!(sc5 & IB_SC5_MASK); +} + static inline u8 ib_get_sl(struct ib_header *hdr) { return ((be16_to_cpu(hdr->lrh[0]) >> IB_SL_SHIFT) & IB_SL_MASK); @@ -221,6 +230,27 @@ static inline u16 ib_get_slid(struct ib_header *hdr) return (be16_to_cpu(hdr->lrh[3])); } +static inline u8 ib_get_lver(struct ib_header *hdr) +{ + return (u8)((be16_to_cpu(hdr->lrh[0]) >> IB_LVER_SHIFT) & + IB_LVER_MASK); +} + +static inline u16 ib_get_len(struct ib_header *hdr) +{ + return (u16)(be16_to_cpu(hdr->lrh[2])); +} + +static inline u32 ib_get_qkey(struct ib_other_headers *ohdr) +{ + return be32_to_cpu(ohdr->u.ud.deth[0]); +} + +static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr) +{ + return ((be32_to_cpu(ohdr->u.ud.deth[1])) & IB_QPN_MASK); +} + /* * BTH */ @@ -229,6 +259,14 @@ static inline u16 ib_get_slid(struct ib_header *hdr) #define IB_BTH_PAD_MASK 3 #define IB_BTH_PKEY_MASK 0xffff #define IB_BTH_PAD_SHIFT 20 +#define IB_BTH_A_MASK 1 +#define IB_BTH_A_SHIFT 31 +#define IB_BTH_M_MASK 1 +#define IB_BTH_M_SHIFT 22 +#define IB_BTH_SE_MASK 1 +#define IB_BTH_SE_SHIFT 23 +#define IB_BTH_TVER_MASK 0xf +#define IB_BTH_TVER_SHIFT 16 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr) { @@ -247,4 +285,50 @@ static inline u8 ib_bth_get_opcode(struct ib_other_headers *ohdr) IB_BTH_OPCODE_MASK); } +static inline u8 ib_bth_get_ackreq(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[2]) >> IB_BTH_A_SHIFT) & + IB_BTH_A_MASK); +} + +static inline u8 ib_bth_get_migreq(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_M_SHIFT) & + IB_BTH_M_MASK); +} + +static inline u8 ib_bth_get_se(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_SE_SHIFT) & + IB_BTH_SE_MASK); +} + +static inline u32 ib_bth_get_psn(struct ib_other_headers *ohdr) +{ + return (u32)(be32_to_cpu(ohdr->bth[2])); +} + +static inline u32 ib_bth_get_qpn(struct ib_other_headers *ohdr) +{ + return (u32)((be32_to_cpu(ohdr->bth[1])) & IB_QPN_MASK); +} + +static inline u8 ib_bth_get_becn(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) & + IB_BECN_MASK); +} + +static inline u8 ib_bth_get_fecn(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) & + IB_FECN_MASK); +} + +static inline u8 ib_bth_get_tver(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_TVER_SHIFT) & + IB_BTH_TVER_MASK); +} + #endif /* IB_HDRS_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ba8314ec5768..8f1ce4e27bbd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -664,6 +664,8 @@ union rdma_network_hdr { }; }; +#define IB_QPN_MASK 0xFFFFFF + enum { IB_MULTICAST_QPN = 0xffffff }; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index be6472e5b06b..13f43b3527a8 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -396,7 +396,7 @@ struct rvt_srq { #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) -#define RVT_QPN_MASK 0xFFFFFF +#define RVT_QPN_MASK IB_QPN_MASK /* * QPN-map pages start out as NULL, they get allocated upon -- cgit v1.2.1 From 228d2af1b723deedee38f03d144b7d25b39f6f86 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:20:08 -0700 Subject: IB/hfi1: Separate input/output header tracing Calls to trace incoming packets will now receive the packet context as parameter. This enables trace support for future packet types. Header trace output is in the format : which makes parsing easier. input_ibhdr trace before change: -0 [001] d.h. 5904.250925: input_ibhdr: [0000:05:00.0] vl 0 lver 0 sl 0 lnh 2,LRH_BTH dlid 0002 len 18 slid 0001 op 0x64,UD_SEND_ONLY se 0 m 0 pad 0 tver 0 pkey 0xffff f 0 b 0 qpn 0x000001 a 0 psn 0x000001b2 deth qkey 0x80010000 sqpn 0x000001 input_ibhdr trace after change: -0 [001] d.h. 6655.714488: input_ibhdr: [0000:05:00.0] (IB) len:124 sc:0 dlid:0x0001 slid:0x0002 lnh:2,LRH_BTH lver:0 sl:0 age:0 becn:0 fecn:0 l4:0 rc:0 entropy:0 op:0x64,UD_SEND_ONLY se:0 m:0 pad:0 tver:0 pkey:0x7fff f:0 b:0 qpn:0x000001 a:0 psn:0x00000036 hlen:8 deth qkey:0x80010000 sqpn:0x000001 Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 49 ----- drivers/infiniband/hw/hfi1/rc.c | 3 +- drivers/infiniband/hw/hfi1/trace.c | 58 +++++- drivers/infiniband/hw/hfi1/trace_ibhdrs.h | 322 ++++++++++++++++++++---------- drivers/infiniband/hw/hfi1/trace_rx.h | 9 + drivers/infiniband/hw/hfi1/verbs.c | 7 +- 6 files changed, 279 insertions(+), 169 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 414a04a481c2..3b76631cbcbd 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -2086,53 +2086,4 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) #define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) - -#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } -#define show_packettype(etype) \ -__print_symbolic(etype, \ - packettype_name(EXPECTED), \ - packettype_name(EAGER), \ - packettype_name(IB), \ - packettype_name(ERROR), \ - packettype_name(BYPASS)) - -#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } -#define show_ib_opcode(opcode) \ -__print_symbolic(opcode, \ - ib_opcode_name(RC_SEND_FIRST), \ - ib_opcode_name(RC_SEND_MIDDLE), \ - ib_opcode_name(RC_SEND_LAST), \ - ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(RC_SEND_ONLY), \ - ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_WRITE_FIRST), \ - ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ - ib_opcode_name(RC_RDMA_WRITE_LAST), \ - ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_WRITE_ONLY), \ - ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_READ_REQUEST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ - ib_opcode_name(RC_ACKNOWLEDGE), \ - ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ - ib_opcode_name(RC_COMPARE_SWAP), \ - ib_opcode_name(RC_FETCH_ADD), \ - ib_opcode_name(UC_SEND_FIRST), \ - ib_opcode_name(UC_SEND_MIDDLE), \ - ib_opcode_name(UC_SEND_LAST), \ - ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(UC_SEND_ONLY), \ - ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(UC_RDMA_WRITE_FIRST), \ - ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ - ib_opcode_name(UC_RDMA_WRITE_LAST), \ - ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(UC_RDMA_WRITE_ONLY), \ - ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(UD_SEND_ONLY), \ - ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(CNP)) #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 66e6843aab48..b443c1e01543 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -798,7 +798,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, goto queue_ack; } - trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr); + trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), + &hdr, ib_is_sc5(sc5)); /* write the pbc and data */ ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index eafae487face..b80b74d0c252 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -47,7 +47,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -u8 ibhdr_exhdr_len(struct ib_header *hdr) +u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr) { struct ib_other_headers *ohdr; u8 opcode; @@ -61,13 +61,18 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr) 0 : hdr_len_by_opcode[opcode] - (12 + 8); } -#define IMM_PRN "imm %d" -#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x" -#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" -#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" -#define IETH_PRN "ieth rkey 0x%.8x" -#define ATOMICACKETH_PRN "origdata %llx" -#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx" +const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet) +{ + return "IB"; +} + +#define IMM_PRN "imm:%d" +#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x" +#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x" +#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x" +#define IETH_PRN "ieth rkey:0x%.8x" +#define ATOMICACKETH_PRN "origdata:%llx" +#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx" #define OP(transport, op) IB_OPCODE_## transport ## _ ## op @@ -84,6 +89,43 @@ static const char *parse_syndrome(u8 syndrome) return ""; } +void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *se, u8 *pad, u8 *opcode, u8 *tver, + u16 *pkey, u32 *psn, u32 *qpn) +{ + *ack = ib_bth_get_ackreq(ohdr); + *becn = ib_bth_get_becn(ohdr); + *fecn = ib_bth_get_fecn(ohdr); + *mig = ib_bth_get_migreq(ohdr); + *se = ib_bth_get_se(ohdr); + *pad = ib_bth_get_pad(ohdr); + *opcode = ib_bth_get_opcode(ohdr); + *tver = ib_bth_get_tver(ohdr); + *pkey = ib_bth_get_pkey(ohdr); + *psn = ib_bth_get_psn(ohdr); + *qpn = ib_bth_get_qpn(ohdr); +} + +void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, + struct ib_other_headers **ohdr, + u8 *lnh, u8 *lver, u8 *sl, u8 *sc, + u16 *len, u32 *dlid, u32 *slid) +{ + *lnh = ib_get_lnh(hdr); + *lver = ib_get_lver(hdr); + *sl = ib_get_sl(hdr); + *sc = ib_get_sc(hdr) | (sc5 << 4); + *len = ib_get_len(hdr); + *dlid = ib_get_dlid(hdr); + *slid = ib_get_slid(hdr); + + if (*lnh == HFI1_LRH_BTH) + *ohdr = &hdr->u.oth; + else + *ohdr = &hdr->u.l.oth; +} + const char *parse_everbs_hdrs( struct trace_seq *p, u8 opcode, diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 090f6b506953..0f2d2da057ec 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -55,8 +55,57 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_ibhdrs -u8 ibhdr_exhdr_len(struct ib_header *hdr); +#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } +#define show_ib_opcode(opcode) \ +__print_symbolic(opcode, \ + ib_opcode_name(RC_SEND_FIRST), \ + ib_opcode_name(RC_SEND_MIDDLE), \ + ib_opcode_name(RC_SEND_LAST), \ + ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(RC_SEND_ONLY), \ + ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_WRITE_FIRST), \ + ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ + ib_opcode_name(RC_RDMA_WRITE_LAST), \ + ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_WRITE_ONLY), \ + ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_READ_REQUEST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ + ib_opcode_name(RC_ACKNOWLEDGE), \ + ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ + ib_opcode_name(RC_COMPARE_SWAP), \ + ib_opcode_name(RC_FETCH_ADD), \ + ib_opcode_name(UC_SEND_FIRST), \ + ib_opcode_name(UC_SEND_MIDDLE), \ + ib_opcode_name(UC_SEND_LAST), \ + ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(UC_SEND_ONLY), \ + ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(UC_RDMA_WRITE_FIRST), \ + ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ + ib_opcode_name(UC_RDMA_WRITE_LAST), \ + ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(UC_RDMA_WRITE_ONLY), \ + ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(UD_SEND_ONLY), \ + ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(CNP)) + const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); +u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr); +const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet); +void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *se, u8 *pad, u8 *opcode, u8 *tver, + u16 *pkey, u32 *psn, u32 *qpn); +void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, + struct ib_other_headers **ohdr, + u8 *lnh, u8 *lver, u8 *sl, u8 *sc, + u16 *len, u32 *dlid, u32 *slid); #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) @@ -66,139 +115,198 @@ __print_symbolic(lrh, \ lrh_name(LRH_BTH), \ lrh_name(LRH_GRH)) -#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x" +#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x" +#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d " #define BTH_PRN \ - "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \ - "f %d b %d qpn 0x%.6x a %d psn 0x%.8x" -#define EHDR_PRN "%s" + "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \ + "f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x" +#define EHDR_PRN "hlen:%d %s" -DECLARE_EVENT_CLASS(hfi1_ibhdr_template, +DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, - struct ib_header *hdr), - TP_ARGS(dd, hdr), + struct hfi1_packet *packet, + bool sc5), + TP_ARGS(dd, packet, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) - /* LRH */ - __field(u8, vl) + __field(u8, lnh) __field(u8, lver) __field(u8, sl) + __field(u16, len) + __field(u32, dlid) + __field(u8, sc) + __field(u32, slid) + __field(u8, opcode) + __field(u8, se) + __field(u8, mig) + __field(u8, pad) + __field(u8, tver) + __field(u16, pkey) + __field(u8, fecn) + __field(u8, becn) + __field(u32, qpn) + __field(u8, ack) + __field(u32, psn) + /* extended headers */ + __dynamic_array(u8, ehdrs, + hfi1_trace_ib_hdr_len(packet->hdr)) + ), + TP_fast_assign( + struct ib_other_headers *ohdr; + + DD_DEV_ASSIGN(dd); + + hfi1_trace_parse_9b_hdr(packet->hdr, sc5, + &ohdr, + &__entry->lnh, + &__entry->lver, + &__entry->sl, + &__entry->sc, + &__entry->len, + &__entry->dlid, + &__entry->slid); + + hfi1_trace_parse_bth(ohdr, &__entry->ack, + &__entry->becn, &__entry->fecn, + &__entry->mig, &__entry->se, + &__entry->pad, &__entry->opcode, + &__entry->tver, &__entry->pkey, + &__entry->psn, &__entry->qpn); + /* extended headers */ + memcpy(__get_dynamic_array(ehdrs), &ohdr->u, + __get_dynamic_array_len(ehdrs)); + ), + TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " " + BTH_PRN " " EHDR_PRN, + __get_str(dev), + __entry->len, + __entry->sc, + __entry->dlid, + __entry->slid, + __entry->lnh, show_lnh(__entry->lnh), + __entry->lver, + __entry->sl, + /* BTH */ + __entry->opcode, show_ib_opcode(__entry->opcode), + __entry->se, + __entry->mig, + __entry->pad, + __entry->tver, + __entry->pkey, + __entry->fecn, + __entry->becn, + __entry->qpn, + __entry->ack, + __entry->psn, + /* extended headers */ + __get_dynamic_array_len(ehdrs), + __parse_ib_ehdrs( + __entry->opcode, + (void *)__get_dynamic_array(ehdrs)) + ) +); + +DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct hfi1_packet *packet, bool sc5), + TP_ARGS(dd, packet, sc5)); + +DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, + bool sc5), + TP_ARGS(dd, hdr, sc5), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) __field(u8, lnh) - __field(u16, dlid) + __field(u8, lver) + __field(u8, sl) __field(u16, len) - __field(u16, slid) - /* BTH */ + __field(u32, dlid) + __field(u8, sc) + __field(u32, slid) __field(u8, opcode) __field(u8, se) - __field(u8, m) + __field(u8, mig) __field(u8, pad) __field(u8, tver) __field(u16, pkey) - __field(u8, f) - __field(u8, b) + __field(u8, fecn) + __field(u8, becn) __field(u32, qpn) - __field(u8, a) + __field(u8, ack) __field(u32, psn) /* extended headers */ - __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) + __dynamic_array(u8, ehdrs, + hfi1_trace_ib_hdr_len(hdr)) ), - TP_fast_assign( + TP_fast_assign( struct ib_other_headers *ohdr; DD_DEV_ASSIGN(dd); - /* LRH */ - __entry->vl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 12); - __entry->lver = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf; - __entry->sl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - __entry->lnh = - (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - __entry->dlid = - be16_to_cpu(hdr->lrh[1]); - /* allow for larger len */ - __entry->len = - be16_to_cpu(hdr->lrh[2]); - __entry->slid = - be16_to_cpu(hdr->lrh[3]); - /* BTH */ - if (__entry->lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - __entry->opcode = - (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; - __entry->se = - (be32_to_cpu(ohdr->bth[0]) >> 23) & 1; - __entry->m = - (be32_to_cpu(ohdr->bth[0]) >> 22) & 1; - __entry->pad = - (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - __entry->tver = - (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf; - __entry->pkey = - be32_to_cpu(ohdr->bth[0]) & 0xffff; - __entry->f = - (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) & - IB_FECN_MASK; - __entry->b = - (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) & - IB_BECN_MASK; - __entry->qpn = - be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - __entry->a = - (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; - /* allow for larger PSN */ - __entry->psn = - be32_to_cpu(ohdr->bth[2]) & 0x7fffffff; + + hfi1_trace_parse_9b_hdr(hdr, sc5, + &ohdr, &__entry->lnh, + &__entry->lver, &__entry->sl, + &__entry->sc, &__entry->len, + &__entry->dlid, &__entry->slid); + + hfi1_trace_parse_bth(ohdr, &__entry->ack, + &__entry->becn, &__entry->fecn, + &__entry->mig, &__entry->se, + &__entry->pad, &__entry->opcode, + &__entry->tver, &__entry->pkey, + &__entry->psn, &__entry->qpn); + /* extended headers */ - memcpy(__get_dynamic_array(ehdrs), &ohdr->u, - ibhdr_exhdr_len(hdr)); - ), - TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN, - __get_str(dev), - /* LRH */ - __entry->vl, - __entry->lver, - __entry->sl, - __entry->lnh, show_lnh(__entry->lnh), - __entry->dlid, - __entry->len, - __entry->slid, - /* BTH */ - __entry->opcode, show_ib_opcode(__entry->opcode), - __entry->se, - __entry->m, - __entry->pad, - __entry->tver, - __entry->pkey, - __entry->f, - __entry->b, - __entry->qpn, - __entry->a, - __entry->psn, - /* extended headers */ - __parse_ib_ehdrs( - __entry->opcode, - (void *)__get_dynamic_array(ehdrs)) - ) + memcpy(__get_dynamic_array(ehdrs), + &ohdr->u, __get_dynamic_array_len(ehdrs)); + ), + TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " " + BTH_PRN " " EHDR_PRN, + __get_str(dev), + __entry->len, + __entry->sc, + __entry->dlid, + __entry->slid, + __entry->lnh, show_lnh(__entry->lnh), + __entry->lver, + __entry->sl, + /* BTH */ + __entry->opcode, show_ib_opcode(__entry->opcode), + __entry->se, + __entry->mig, + __entry->pad, + __entry->tver, + __entry->pkey, + __entry->fecn, + __entry->becn, + __entry->qpn, + __entry->ack, + __entry->psn, + /* extended headers */ + __get_dynamic_array_len(ehdrs), + __parse_ib_ehdrs( + __entry->opcode, + (void *)__get_dynamic_array(ehdrs)) + ) ); -DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); +DEFINE_EVENT(hfi1_output_ibhdr_template, pio_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, bool sc5), + TP_ARGS(dd, hdr, sc5)); -DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); +DEFINE_EVENT(hfi1_output_ibhdr_template, ack_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, bool sc5), + TP_ARGS(dd, hdr, sc5)); -DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); +DEFINE_EVENT(hfi1_output_ibhdr_template, sdma_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, bool sc5), + TP_ARGS(dd, hdr, sc5)); -DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); #endif /* __HFI1_TRACE_IBHDRS_H */ diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index f77e59fb43fe..05fc6d68ffe8 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -55,6 +55,15 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_rx +#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } +#define show_packettype(etype) \ +__print_symbolic(etype, \ + packettype_name(EXPECTED), \ + packettype_name(EAGER), \ + packettype_name(IB), \ + packettype_name(ERROR), \ + packettype_name(BYPASS)) + TRACE_EVENT(hfi1_rcvhdr, TP_PROTO(struct hfi1_devdata *dd, u32 ctxt, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 128d2917a2d9..5f4be35f31b6 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -589,8 +589,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) goto drop; } - trace_input_ibhdr(rcd->dd, hdr); - + trace_input_ibhdr(rcd->dd, packet, !!(packet->rhf & RHF_DC_INFO_SMASK)); opcode = ib_bth_get_opcode(packet->ohdr); inc_opstats(tlen, &rcd->opstats->stats[opcode]); @@ -885,7 +884,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, return ret; } trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &ps->s_txreq->phdr.hdr); + &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); return ret; bail_ecomm: @@ -1058,7 +1057,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &ps->s_txreq->phdr.hdr); + &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); pio_bail: if (qp->s_wqe) { -- cgit v1.2.1 From 9039746cdf39dcbf2ddfcc4a68f729cbbbc853df Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:20:20 -0700 Subject: IB/hfi1: Setup common IB fields in hfi1_packet struct We move many common IB fields into the hfi1_packet structure and set them up in a single function. This allows us to set the fields in a single place and not deal with them throughout the driver. Reviewed-by: Brian Welty Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 9 -- drivers/infiniband/hw/hfi1/chip.h | 2 - drivers/infiniband/hw/hfi1/common.h | 1 + drivers/infiniband/hw/hfi1/driver.c | 158 +++++++++++++++++++++++++----------- drivers/infiniband/hw/hfi1/hfi.h | 21 ++++- drivers/infiniband/hw/hfi1/rc.c | 33 +++----- drivers/infiniband/hw/hfi1/ruc.c | 89 ++++++++++---------- drivers/infiniband/hw/hfi1/uc.c | 16 +--- drivers/infiniband/hw/hfi1/ud.c | 23 ++---- drivers/infiniband/hw/hfi1/verbs.c | 89 +++++++++----------- drivers/infiniband/hw/hfi1/verbs.h | 6 +- 11 files changed, 236 insertions(+), 211 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2ba00b89df6a..5dbee3c1bd45 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9810,15 +9810,6 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd) hfi1_put_tid(dd, i, PT_INVALID, 0, 0); } -struct ib_header *hfi1_get_msgheader( - struct hfi1_devdata *dd, __le32 *rhf_addr) -{ - u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); - - return (struct ib_header *) - (rhf_addr - dd->rhf_offset + offset); -} - static const char * const ib_cfg_name_strings[] = { "HFI1_IB_CFG_LIDLMC", "HFI1_IB_CFG_LWID_DG_ENB", diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index cbe455d9ab8b..0b4f418ba0ac 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1347,8 +1347,6 @@ enum { u64 get_all_cpu_total(u64 __percpu *cntr); void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); -struct ib_header *hfi1_get_msgheader( - struct hfi1_devdata *dd, __le32 *rhf_addr); void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 995d62c7f9a7..ba9ab971ced9 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -325,6 +325,7 @@ struct diag_pkt { #define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ /* misc. */ +#define SC15_PACKET 0xF #define SIZE_OF_CRC 1 #define LIM_MGMT_P_KEY 0x7FFF diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 0583479f2576..2a1022e374a5 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -224,6 +224,20 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf, (offset * RCV_BUF_BLOCK_SIZE)); } +static inline void *hfi1_get_header(struct hfi1_devdata *dd, + __le32 *rhf_addr) +{ + u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); + + return (void *)(rhf_addr - dd->rhf_offset + offset); +} + +static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd, + __le32 *rhf_addr) +{ + return (struct ib_header *)hfi1_get_header(dd, rhf_addr); +} + /* * Validate and encode the a given RcvArray Buffer size. * The function will check whether the given size falls within @@ -249,7 +263,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, { struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); - int lnh = ib_get_lnh(rhdr); + u8 lnh = ib_get_lnh(rhdr); + bool has_grh = false; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct hfi1_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -257,37 +272,42 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) return; + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &rhdr->u.oth; + } else if (lnh == HFI1_LRH_GRH) { + has_grh = true; + packet->ohdr = &rhdr->u.l.oth; + packet->grh = &rhdr->u.l.grh; + } else { + goto drop; + } + if (packet->rhf & RHF_TID_ERR) { /* For TIDERR and RC QPs preemptively schedule a NAK */ - struct ib_other_headers *ohdr = NULL; u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ - u16 lid = ib_get_dlid(rhdr); + u32 dlid = ib_get_dlid(rhdr); u32 qp_num; - u32 rcv_flags = 0; + u32 mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE); /* Sanity check packet */ if (tlen < 24) goto drop; /* Check for GRH */ - if (lnh == HFI1_LRH_BTH) { - ohdr = &rhdr->u.oth; - } else if (lnh == HFI1_LRH_GRH) { + if (has_grh) { u32 vtf; + struct ib_grh *grh = packet->grh; - ohdr = &rhdr->u.l.oth; - if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + if (grh->next_hdr != IB_GRH_NEXT_HDR) goto drop; - vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow); + vtf = be32_to_cpu(grh->version_tclass_flow); if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; - rcv_flags |= HFI1_HAS_GRH; - } else { - goto drop; } + /* Get the destination QP number. */ - qp_num = ib_bth_get_qpn(ohdr); - if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { + qp_num = ib_bth_get_qpn(packet->ohdr); + if (dlid < mlid_base) { struct rvt_qp *qp; unsigned long flags; @@ -312,11 +332,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, switch (qp->ibqp.qp_type) { case IB_QPT_RC: - hfi1_rc_hdrerr( - rcd, - rhdr, - rcv_flags, - qp); + hfi1_rc_hdrerr(rcd, packet, qp); break; default: /* For now don't handle any other QP types */ @@ -332,9 +348,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, switch (rte) { case RHF_RTE_ERROR_OP_CODE_ERR: { - u32 opcode; void *ebuf = NULL; - __be32 *bth = NULL; + u8 opcode; if (rhf_use_egr_bfr(packet->rhf)) ebuf = packet->ebuf; @@ -342,16 +357,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (!ebuf) goto drop; /* this should never happen */ - if (lnh == HFI1_LRH_BTH) - bth = (__be32 *)ebuf; - else if (lnh == HFI1_LRH_GRH) - bth = (__be32 *)((char *)ebuf + sizeof(struct ib_grh)); - else - goto drop; - - opcode = be32_to_cpu(bth[0]) >> 24; - opcode &= 0xff; - + opcode = ib_bth_get_opcode(packet->ohdr); if (opcode == IB_OPCODE_CNP) { /* * Only in pre-B0 h/w is the CNP_OPCODE handled @@ -365,7 +371,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; - lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; + lqpn = ib_bth_get_qpn(packet->ohdr); rcu_read_lock(); qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn); if (!qp) { @@ -415,7 +421,6 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd, packet->rhf = rhf_to_cpu(packet->rhf_addr); packet->rhqoff = rcd->head; packet->numpkt = 0; - packet->rcv_flags = 0; } void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, @@ -424,15 +429,12 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct ib_header *hdr = pkt->hdr; struct ib_other_headers *ohdr = pkt->ohdr; - struct ib_grh *grh = NULL; + struct ib_grh *grh = pkt->grh; u32 rqpn = 0, bth1; u16 rlid, dlid = ib_get_dlid(hdr); u8 sc, svc_type; bool is_mcast = false; - if (pkt->rcv_flags & HFI1_HAS_GRH) - grh = &hdr->u.l.grh; - switch (qp->ibqp.qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: @@ -591,9 +593,10 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (lnh == HFI1_LRH_BTH) { packet->ohdr = &hdr->u.oth; + packet->grh = NULL; } else if (lnh == HFI1_LRH_GRH) { packet->ohdr = &hdr->u.l.oth; - packet->rcv_flags |= HFI1_HAS_GRH; + packet->grh = &hdr->u.l.grh; } else { goto next; /* just in case */ } @@ -698,10 +701,9 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; - packet->hdr = hfi1_get_msgheader(packet->rcd->dd, - packet->rhf_addr); - packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; packet->etype = rhf_rcv_type(packet->rhf); + + packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; /* total length */ packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */ /* retrieve eager buffer details */ @@ -759,7 +761,7 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet) packet->etail, 0, 0); packet->updegr = 0; } - packet->rcv_flags = 0; + packet->grh = NULL; } static inline void finish_packet(struct hfi1_packet *packet) @@ -896,12 +898,15 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, struct hfi1_devdata *dd) { struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; - struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, - packet->rhf_addr); u8 etype = rhf_rcv_type(packet->rhf); + u8 sc = SC15_PACKET; - if (etype == RHF_RCV_TYPE_IB && - hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) { + if (etype == RHF_RCV_TYPE_IB) { + struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); + sc = hfi1_9B_get_sc5(hdr, packet->rhf); + } + if (sc != SC15_PACKET) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { @@ -1321,6 +1326,58 @@ bail: return ret; } +static inline void hfi1_setup_ib_header(struct hfi1_packet *packet) +{ + packet->hdr = (struct hfi1_ib_message_header *) + hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); + packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; +} + +static int hfi1_setup_9B_packet(struct hfi1_packet *packet) +{ + struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); + struct ib_header *hdr; + u8 lnh; + + hfi1_setup_ib_header(packet); + hdr = packet->hdr; + + lnh = ib_get_lnh(hdr); + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &hdr->u.oth; + packet->grh = NULL; + } else if (lnh == HFI1_LRH_GRH) { + u32 vtf; + + packet->ohdr = &hdr->u.l.oth; + packet->grh = &hdr->u.l.grh; + if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) + goto drop; + vtf = be32_to_cpu(packet->grh->version_tclass_flow); + if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) + goto drop; + } else { + goto drop; + } + + /* Query commonly used fields from packet header */ + packet->opcode = ib_bth_get_opcode(packet->ohdr); + packet->slid = ib_get_slid(hdr); + packet->dlid = ib_get_dlid(hdr); + packet->sl = ib_get_sl(hdr); + packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf); + packet->pad = ib_bth_get_pad(packet->ohdr); + packet->extra_byte = 0; + packet->fecn = ib_bth_get_fecn(packet->ohdr); + packet->becn = ib_bth_get_becn(packet->ohdr); + + return 0; +drop: + ibp->rvp.n_pkt_drops++; + return -EINVAL; +} + void handle_eflags(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; @@ -1351,6 +1408,9 @@ int process_receive_ib(struct hfi1_packet *packet) if (unlikely(hfi1_dbg_fault_packet(packet))) return RHF_RCV_CONTINUE; + if (hfi1_setup_9B_packet(packet)) + return RHF_RCV_CONTINUE; + trace_hfi1_rcvhdr(packet->rcd->ppd->dd, packet->rcd->ctxt, rhf_err_flags(packet->rhf), @@ -1422,6 +1482,7 @@ int process_receive_error(struct hfi1_packet *packet) rhf_rcv_type_err(packet->rhf) == 3)) return RHF_RCV_CONTINUE; + hfi1_setup_ib_header(packet); handle_eflags(packet); if (unlikely(rhf_err_flags(packet->rhf))) @@ -1435,6 +1496,8 @@ int kdeth_process_expected(struct hfi1_packet *packet) { if (unlikely(hfi1_dbg_fault_packet(packet))) return RHF_RCV_CONTINUE; + + hfi1_setup_ib_header(packet); if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); @@ -1445,6 +1508,7 @@ int kdeth_process_expected(struct hfi1_packet *packet) int kdeth_process_eager(struct hfi1_packet *packet) { + hfi1_setup_ib_header(packet); if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); if (unlikely(hfi1_dbg_fault_packet(packet))) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 3b76631cbcbd..9c6c73448461 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -356,17 +356,26 @@ struct hfi1_packet { __le32 *rhf_addr; struct rvt_qp *qp; struct ib_other_headers *ohdr; + struct ib_grh *grh; u64 rhf; u32 maxcnt; u32 rhqoff; + u32 dlid; + u32 slid; u16 tlen; s16 etail; u8 hlen; u8 numpkt; u8 rsize; u8 updegr; - u8 rcv_flags; u8 etype; + u8 extra_byte; + u8 pad; + u8 sc; + u8 sl; + u8 opcode; + bool becn; + bool fecn; }; struct rvt_sge_state; @@ -2086,4 +2095,14 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) #define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) + +/* + * hfi1_check_mcast- Check if the given lid is + * in the IB multicast range. + */ +static inline bool hfi1_check_mcast(u16 lid) +{ + return ((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (lid != be16_to_cpu(IB_LID_PERMISSIVE))); +} #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index b443c1e01543..baa67bf0772b 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1916,17 +1916,16 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, void hfi1_rc_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct ib_header *hdr = packet->hdr; - u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct ib_other_headers *ohdr = packet->ohdr; - u32 bth0, opcode; + u32 bth0; + u32 opcode = packet->opcode; u32 hdrsize = packet->hlen; u32 psn; - u32 pad; + u32 pad = packet->pad; struct ib_wc wc; u32 pmtu = qp->pmtu; int diff; @@ -1938,14 +1937,13 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) u32 rkey; lockdep_assert_held(&qp->r_lock); + bth0 = be32_to_cpu(ohdr->bth[0]); - if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) + if (hfi1_ruc_check_hdr(ibp, packet)) return; is_fecn = process_ecn(qp, packet, false); - psn = ib_bth_get_psn(ohdr); - opcode = ib_bth_get_opcode(ohdr); /* * Process responses (ACKs) before anything else. Note that the @@ -2075,8 +2073,6 @@ no_immediate_data: wc.wc_flags = 0; wc.ex.imm_data = 0; send_last: - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -2369,28 +2365,19 @@ send_ack: void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct ib_header *hdr, - u32 rcv_flags, + struct hfi1_packet *packet, struct rvt_qp *qp) { - int has_grh = rcv_flags & HFI1_HAS_GRH; - struct ib_other_headers *ohdr; struct hfi1_ibport *ibp = rcd_to_iport(rcd); int diff; u32 opcode; - u32 psn, bth0; - - /* Check for GRH */ - ohdr = &hdr->u.oth; - if (has_grh) - ohdr = &hdr->u.l.oth; + u32 psn; - bth0 = be32_to_cpu(ohdr->bth[0]); - if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) + if (hfi1_ruc_check_hdr(ibp, packet)) return; - psn = ib_bth_get_psn(ohdr); - opcode = ib_bth_get_opcode(ohdr); + psn = ib_bth_get_psn(packet->ohdr); + opcode = ib_bth_get_opcode(packet->ohdr); /* Only deal with RDMA Writes for now */ if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 3a17daba28a9..9cc9c7be9dd4 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -214,100 +214,95 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * * The s_lock will be acquired around the hfi1_migrate_qp() call. */ -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, - int has_grh, struct rvt_qp *qp, u32 bth0) +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) { __be64 guid; unsigned long flags; + struct rvt_qp *qp = packet->qp; u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; - - if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { - if (!has_grh) { + u32 dlid = packet->dlid; + u32 slid = packet->slid; + u32 sl = packet->sl; + int migrated; + u32 bth0, bth1; + + bth0 = be32_to_cpu(packet->ohdr->bth[0]); + bth1 = be32_to_cpu(packet->ohdr->bth[1]); + migrated = bth0 & IB_BTH_MIG_REQ; + + if (qp->s_mig_state == IB_MIG_ARMED && migrated) { + if (!packet->grh) { if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) & IB_AH_GRH) - goto err; + return 1; } else { const struct ib_global_route *grh; if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) & IB_AH_GRH)) - goto err; + return 1; grh = rdma_ah_read_grh(&qp->alt_ah_attr); guid = get_sguid(ibp, grh->sgid_index); - if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, + if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix, guid)) - goto err; + return 1; if (!gid_ok( - &hdr->u.l.grh.sgid, + &packet->grh->sgid, grh->dgid.global.subnet_prefix, grh->dgid.global.interface_id)) - goto err; + return 1; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, - ib_get_slid(hdr)))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - (u16)bth0, - ib_get_sl(hdr), - 0, qp->ibqp.qp_num, - ib_get_slid(hdr), - ib_get_dlid(hdr)); - goto err; + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, + sc5, slid))) { + hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); + return 1; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ - if (ib_get_slid(hdr) != - rdma_ah_get_dlid(&qp->alt_ah_attr) || + if (slid != rdma_ah_get_dlid(&qp->alt_ah_attr) || ppd_from_ibp(ibp)->port != rdma_ah_get_port_num(&qp->alt_ah_attr)) - goto err; + return 1; spin_lock_irqsave(&qp->s_lock, flags); hfi1_migrate_qp(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } else { - if (!has_grh) { + if (!packet->grh) { if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) - goto err; + return 1; } else { const struct ib_global_route *grh; if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) - goto err; + return 1; grh = rdma_ah_read_grh(&qp->remote_ah_attr); guid = get_sguid(ibp, grh->sgid_index); - if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, + if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix, guid)) - goto err; + return 1; if (!gid_ok( - &hdr->u.l.grh.sgid, + &packet->grh->sgid, grh->dgid.global.subnet_prefix, grh->dgid.global.interface_id)) - goto err; + return 1; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, - ib_get_slid(hdr)))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - (u16)bth0, - ib_get_sl(hdr), - 0, qp->ibqp.qp_num, - ib_get_slid(hdr), - ib_get_dlid(hdr)); - goto err; + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, + sc5, slid))) { + hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); + return 1; } /* Validate the SLID. See Ch. 9.6.1.5 */ - if (ib_get_slid(hdr) != - rdma_ah_get_dlid(&qp->remote_ah_attr) || + if ((slid != rdma_ah_get_dlid(&qp->remote_ah_attr)) || ppd_from_ibp(ibp)->port != qp->port_num) - goto err; - if (qp->s_mig_state == IB_MIG_REARM && - !(bth0 & IB_BTH_MIG_REQ)) + return 1; + if (qp->s_mig_state == IB_MIG_REARM && !migrated) qp->s_mig_state = IB_MIG_ARMED; } return 0; - -err: - return 1; } /** diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 2a5650f8aee0..76c2451a53d7 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -297,31 +297,25 @@ bail_no_tx: void hfi1_uc_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); - struct ib_header *hdr = packet->hdr; - u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct ib_other_headers *ohdr = packet->ohdr; - u32 bth0, opcode; + u32 opcode = packet->opcode; u32 hdrsize = packet->hlen; u32 psn; - u32 pad; + u32 pad = packet->pad; struct ib_wc wc; u32 pmtu = qp->pmtu; struct ib_reth *reth; - int has_grh = rcv_flags & HFI1_HAS_GRH; int ret; - bth0 = be32_to_cpu(ohdr->bth[0]); - if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) + if (hfi1_ruc_check_hdr(ibp, packet)) return; process_ecn(qp, packet, true); psn = ib_bth_get_psn(ohdr); - opcode = ib_bth_get_opcode(ohdr); - /* Compare the PSN verses the expected PSN. */ if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) { /* @@ -432,8 +426,6 @@ no_immediate_data: wc.ex.imm_data = 0; wc.wc_flags = 0; send_last: - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -527,8 +519,6 @@ rdma_first: rdma_last_imm: wc.wc_flags = IB_WC_WITH_IMM; - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 49fe179ad3ae..c995aa58c36a 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -668,36 +668,31 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, void hfi1_ud_rcv(struct hfi1_packet *packet) { struct ib_other_headers *ohdr = packet->ohdr; - int opcode; u32 hdrsize = packet->hlen; struct ib_wc wc; u32 qkey; u32 src_qp; - u16 dlid, pkey; + u16 pkey; int mgmt_pkey_idx = -1; struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_header *hdr = packet->hdr; - u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; - bool has_grh = rcv_flags & HFI1_HAS_GRH; u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); u32 bth1; - u8 sl_from_sc, sl; - u16 slid; - u8 extra_bytes; + u8 sl_from_sc; + u8 extra_bytes = packet->pad; + u8 opcode = packet->opcode; + u8 sl = packet->sl; + u32 dlid = packet->dlid; + u32 slid = packet->slid; + bth1 = be32_to_cpu(ohdr->bth[1]); qkey = ib_get_qkey(ohdr); src_qp = ib_get_sqpn(ohdr); - dlid = ib_get_dlid(hdr); - bth1 = be32_to_cpu(ohdr->bth[1]); - slid = ib_get_slid(hdr); pkey = ib_bth_get_pkey(ohdr); - opcode = ib_bth_get_opcode(ohdr); - sl = ib_get_sl(hdr); - extra_bytes = ib_bth_get_pad(ohdr); extra_bytes += (SIZE_OF_CRC << 2); sl_from_sc = ibp->sc_to_sl[sc5]; @@ -811,7 +806,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) qp->r_flags |= RVT_R_REUSE_SGE; goto drop; } - if (has_grh) { + if (packet->grh) { hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh, sizeof(struct ib_grh), true, false); wc.wc_flags |= IB_WC_GRH; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 5f4be35f31b6..af54d3f4696a 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -508,13 +508,14 @@ again: /* * Make sure the QP is ready and able to accept the given opcode. */ -static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) +static inline opcode_handler qp_ok(struct hfi1_packet *packet) { if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) return NULL; - if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) || - (opcode == IB_OPCODE_CNP)) - return opcode_handler_tbl[opcode]; + if (((packet->opcode & RVT_OPCODE_QP_MASK) == + packet->qp->allowed_ops) || + (packet->opcode == IB_OPCODE_CNP)) + return opcode_handler_tbl[packet->opcode]; return NULL; } @@ -548,68 +549,34 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) return pbc; } -/** - * hfi1_ib_rcv - process an incoming packet - * @packet: data packet information - * - * This is called to process an incoming packet at interrupt level. - * - * Tlen is the length of the header + data + CRC in bytes. - */ -void hfi1_ib_rcv(struct hfi1_packet *packet) +static inline void hfi1_handle_packet(struct hfi1_packet *packet, + bool is_mcast) { + u32 qp_num; struct hfi1_ctxtdata *rcd = packet->rcd; - struct ib_header *hdr = packet->hdr; - u32 tlen = packet->tlen; struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; opcode_handler packet_handler; unsigned long flags; - u32 qp_num; - int lnh; - u8 opcode; - u16 lid; - - /* Check for GRH */ - lnh = ib_get_lnh(hdr); - if (lnh == HFI1_LRH_BTH) { - packet->ohdr = &hdr->u.oth; - } else if (lnh == HFI1_LRH_GRH) { - u32 vtf; - - packet->ohdr = &hdr->u.l.oth; - if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) - goto drop; - vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); - if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) - goto drop; - packet->rcv_flags |= HFI1_HAS_GRH; - } else { - goto drop; - } - trace_input_ibhdr(rcd->dd, packet, !!(packet->rhf & RHF_DC_INFO_SMASK)); - opcode = ib_bth_get_opcode(packet->ohdr); - inc_opstats(tlen, &rcd->opstats->stats[opcode]); + inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]); - /* Get the destination QP number. */ - qp_num = ib_bth_get_qpn(packet->ohdr); - lid = ib_get_dlid(hdr); - if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { + if (unlikely(is_mcast)) { struct rvt_mcast *mcast; struct rvt_mcast_qp *p; - if (lnh != HFI1_LRH_GRH) + if (!packet->grh) goto drop; - mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid); + mcast = rvt_mcast_find(&ibp->rvp, + &packet->grh->dgid, + packet->dlid); if (!mcast) goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; spin_lock_irqsave(&packet->qp->r_lock, flags); - packet_handler = qp_ok(opcode, packet); + packet_handler = qp_ok(packet); if (likely(packet_handler)) packet_handler(packet); else @@ -623,19 +590,21 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) if (atomic_dec_return(&mcast->refcount) <= 1) wake_up(&mcast->wait); } else { + /* Get the destination QP number. */ + qp_num = ib_bth_get_qpn(packet->ohdr); rcu_read_lock(); packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); if (!packet->qp) { rcu_read_unlock(); goto drop; } - if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode, + if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode, true))) { rcu_read_unlock(); goto drop; } spin_lock_irqsave(&packet->qp->r_lock, flags); - packet_handler = qp_ok(opcode, packet); + packet_handler = qp_ok(packet); if (likely(packet_handler)) packet_handler(packet); else @@ -644,11 +613,29 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) rcu_read_unlock(); } return; - drop: ibp->rvp.n_pkt_drops++; } +/** + * hfi1_ib_rcv - process an incoming packet + * @packet: data packet information + * + * This is called to process an incoming packet at interrupt level. + */ +void hfi1_ib_rcv(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; + bool is_mcast = false; + + if (unlikely(hfi1_check_mcast(packet->dlid))) + is_mcast = true; + + trace_input_ibhdr(rcd->dd, packet, + !!(packet->rhf & RHF_DC_INFO_SMASK)); + hfi1_handle_packet(packet, is_mcast); +} + /* * This is called from a timer to check for QPs * which need kernel memory in order to send a packet. diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index cd635d0c1d3b..17b38cd5f654 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -307,8 +307,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet); void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct ib_header *hdr, - u32 rcv_flags, + struct hfi1_packet *packet, struct rvt_qp *qp); u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); @@ -346,8 +345,7 @@ static inline u8 get_opcode(struct ib_header *h) return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; } -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, - int has_grh, struct rvt_qp *qp, u32 bth0); +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, const struct ib_global_route *grh, u32 hwords, u32 nwords); -- cgit v1.2.1 From 14fe13fcd3afb96b06809f280b586be1c998332c Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 12 May 2017 09:20:31 -0700 Subject: IB/rdmavt: Compress adjacent SGEs in rvt_lkey_ok() SGEs that are contiguous needlessly consume driver dependent TX resources. The lkey validation logic is enhanced to compress the SGE that ends up in the send wqe when consecutive addresses are detected. The lkey validation API used to return 1 (success) or 0 (fail). The return value is now an -errno, 0 (compressed), or 1 (uncompressed). A additional argument is added to pass the last SQE for the compression. Loopback callers always pass a NULL to last_sge since the optimization is of little benefit in that situation. Reviewed-by: Dennis Dalessandro Signed-off-by: Brian Welty Signed-off-by: Venkata Sandeep Dhanalakota Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/ruc.c | 2 +- drivers/infiniband/hw/qib/qib_ruc.c | 2 +- drivers/infiniband/sw/rdmavt/mr.c | 51 +++++++++++++++++++++++---- drivers/infiniband/sw/rdmavt/qp.c | 23 ++++++------ drivers/infiniband/sw/rdmavt/trace_mr.h | 62 +++++++++++++++++++++++++++++++++ drivers/infiniband/sw/rdmavt/trace_tx.h | 11 +++--- include/rdma/rdma_vt.h | 3 +- 7 files changed, 130 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 9cc9c7be9dd4..476fe5da2992 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -75,7 +75,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) continue; /* Check LKEY */ if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index bd09de7c6e56..88d84cbf7e5a 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -59,7 +59,7 @@ static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) continue; /* Check LKEY */ if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index aa5f9ea318e4..ea95672d9675 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -777,24 +777,55 @@ out: return ret; } +/** + * rvt_sge_adjacent - is isge compressible + * @isge: outgoing internal SGE + * @last_sge: last outgoing SGE written + * @sge: SGE to check + * + * If adjacent will update last_sge to add length. + * + * Return: true if isge is adjacent to last sge + */ +static inline bool rvt_sge_adjacent(struct rvt_sge *isge, + struct rvt_sge *last_sge, + struct ib_sge *sge) +{ + if (last_sge && sge->lkey == last_sge->mr->lkey && + ((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) { + if (sge->lkey) { + if (unlikely((sge->addr - last_sge->mr->user_base + + sge->length > last_sge->mr->length))) + return false; /* overrun, caller will catch */ + } else { + last_sge->length += sge->length; + } + last_sge->sge_length += sge->length; + trace_rvt_sge_adjacent(last_sge, sge); + return true; + } + return false; +} + /** * rvt_lkey_ok - check IB SGE for validity and initialize * @rkt: table containing lkey to check SGE against * @pd: protection domain * @isge: outgoing internal SGE + * @last_sge: last outgoing SGE written * @sge: SGE to check * @acc: access flags * * Check the IB SGE for validity and initialize our internal version * of it. * - * Return: 1 if valid and successful, otherwise returns 0. - * - * increments the reference count upon success + * Increments the reference count when a new sge is stored. * + * Return: 0 if compressed, 1 if added , otherwise returns -errno. */ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, - struct rvt_sge *isge, struct ib_sge *sge, int acc) + struct rvt_sge *isge, struct rvt_sge *last_sge, + struct ib_sge *sge, int acc) { struct rvt_mregion *mr; unsigned n, m; @@ -804,12 +835,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, * We use LKEY == zero for kernel virtual addresses * (see rvt_get_dma_mr() and dma_virt_ops). */ - rcu_read_lock(); if (sge->lkey == 0) { struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); if (pd->user) - goto bail; + return -EINVAL; + if (rvt_sge_adjacent(isge, last_sge, sge)) + return 0; + rcu_read_lock(); mr = rcu_dereference(dev->dma_mr); if (!mr) goto bail; @@ -824,6 +857,9 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->n = 0; goto ok; } + if (rvt_sge_adjacent(isge, last_sge, sge)) + return 0; + rcu_read_lock(); mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); if (!mr) goto bail; @@ -874,12 +910,13 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->m = m; isge->n = n; ok: + trace_rvt_sge_new(isge, sge); return 1; bail_unref: rvt_put_mr(mr); bail: rcu_read_unlock(); - return 0; + return -EINVAL; } EXPORT_SYMBOL(rvt_lkey_ok); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 727e81cc2c8f..a3dd1e536860 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1646,7 +1646,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct rvt_pd *pd; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); u8 log_pmtu; - int ret; + int ret, incr; size_t cplen; bool reserved_op; int local_ops_delayed = 0; @@ -1719,22 +1719,23 @@ static int rvt_post_one_wr(struct rvt_qp *qp, wqe->length = 0; j = 0; if (wr->num_sge) { + struct rvt_sge *last_sge = NULL; + acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0; for (i = 0; i < wr->num_sge; i++) { u32 length = wr->sg_list[i].length; - int ok; if (length == 0) continue; - ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], - &wr->sg_list[i], acc); - if (!ok) { - ret = -EINVAL; - goto bail_inval_free; - } + incr = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge, + &wr->sg_list[i], acc); + if (unlikely(incr < 0)) + goto bail_lkey_error; wqe->length += length; - j++; + if (incr) + last_sge = &wqe->sg_list[j]; + j += incr; } wqe->wr.num_sge = j; } @@ -1781,12 +1782,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp, wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED; qp->s_avail--; } - trace_rvt_post_one_wr(qp, wqe); + trace_rvt_post_one_wr(qp, wqe, wr->num_sge); smp_wmb(); /* see request builders */ qp->s_head = next; return 0; +bail_lkey_error: + ret = incr; bail_inval_free: /* release mr holds */ while (j) { diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h index 3318a6c36373..976e482930a3 100644 --- a/drivers/infiniband/sw/rdmavt/trace_mr.h +++ b/drivers/infiniband/sw/rdmavt/trace_mr.h @@ -103,6 +103,68 @@ DEFINE_EVENT( TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), TP_ARGS(mr, m, n, v, len)); +DECLARE_EVENT_CLASS( + rvt_sge_template, + TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge), + TP_ARGS(sge, isge), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(sge->mr->pd->device)) + __field(struct rvt_mregion *, mr) + __field(struct rvt_sge *, sge) + __field(struct ib_sge *, isge) + __field(void *, vaddr) + __field(u64, ivaddr) + __field(u32, lkey) + __field(u32, sge_length) + __field(u32, length) + __field(u32, ilength) + __field(int, user) + __field(u16, m) + __field(u16, n) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(sge->mr->pd->device)); + __entry->mr = sge->mr; + __entry->sge = sge; + __entry->isge = isge; + __entry->vaddr = sge->vaddr; + __entry->ivaddr = isge->addr; + __entry->lkey = sge->mr->lkey; + __entry->sge_length = sge->sge_length; + __entry->length = sge->length; + __entry->ilength = isge->length; + __entry->m = sge->m; + __entry->n = sge->m; + __entry->user = ibpd_to_rvtpd(sge->mr->pd)->user; + ), + TP_printk( + "[%s] mr %p sge %p isge %p vaddr %p ivaddr %llx lkey %x sge_length %u length %u ilength %u m %u n %u user %u", + __get_str(dev), + __entry->mr, + __entry->sge, + __entry->isge, + __entry->vaddr, + __entry->ivaddr, + __entry->lkey, + __entry->sge_length, + __entry->length, + __entry->ilength, + __entry->m, + __entry->n, + __entry->user + ) +); + +DEFINE_EVENT( + rvt_sge_template, rvt_sge_adjacent, + TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge), + TP_ARGS(sge, isge)); + +DEFINE_EVENT( + rvt_sge_template, rvt_sge_new, + TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge), + TP_ARGS(sge, isge)); + #endif /* __RVT_TRACE_MR_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h index a613a2223751..0ef25fc49f25 100644 --- a/drivers/infiniband/sw/rdmavt/trace_tx.h +++ b/drivers/infiniband/sw/rdmavt/trace_tx.h @@ -84,12 +84,12 @@ __print_symbolic(opcode, \ wr_opcode_name(RESERVED10)) #define POS_PRN \ -"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u" +"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u wr_num_sge %u" TRACE_EVENT( rvt_post_one_wr, - TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), - TP_ARGS(qp, wqe), + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, int wr_num_sge), + TP_ARGS(qp, wqe, wr_num_sge), TP_STRUCT__entry( RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) __field(u64, wr_id) @@ -108,6 +108,7 @@ TRACE_EVENT( __field(int, send_flags) __field(pid_t, pid) __field(int, num_sge) + __field(int, wr_num_sge) ), TP_fast_assign( RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) @@ -127,6 +128,7 @@ TRACE_EVENT( __entry->ssn = wqe->ssn; __entry->send_flags = wqe->wr.send_flags; __entry->num_sge = wqe->wr.num_sge; + __entry->wr_num_sge = wr_num_sge; ), TP_printk( POS_PRN, @@ -146,7 +148,8 @@ TRACE_EVENT( __entry->head, __entry->last, __entry->pid, - __entry->num_sge + __entry->num_sge, + __entry->wr_num_sge ) ); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4878aaf7bdff..d0b9f91e5f4d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -515,7 +515,8 @@ int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, - struct rvt_sge *isge, struct ib_sge *sge, int acc); + struct rvt_sge *isge, struct rvt_sge *last_sge, + struct ib_sge *sge, int acc); struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, u16 lid); -- cgit v1.2.1 From 7be85676f1d13c77a7e0c72e04903bfd39580d4f Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:12 -0700 Subject: IB/hfi1: Don't remove RB entry when not needed. An RB tree is used for the SDMA pinning cache. Cache entries are extracted and reinserted from the tree in case the address range for it changes. However, if the address range for the entry doesn't change, deleting the entry from the RB tree is not necessary. This affects performance since the tree needs to be rebalanced for each insertion, and this happens in the hot path. Optimize RB search by not removing entries when it's not needed. Reviewed-by: Mike Marciniszyn Reviewed-by: Mitko Haralanov Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mmu_rb.c | 14 ++++++++++---- drivers/infiniband/hw/hfi1/mmu_rb.h | 5 +++-- drivers/infiniband/hw/hfi1/user_sdma.c | 23 ++++++++++++++++------- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index ccbf52c8ff6f..d41fd87a39f2 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -217,21 +217,27 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, return node; } -struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, - unsigned long addr, unsigned long len) +bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler, + unsigned long addr, unsigned long len, + struct mmu_rb_node **rb_node) { struct mmu_rb_node *node; unsigned long flags; + bool ret = false; spin_lock_irqsave(&handler->lock, flags); node = __mmu_rb_search(handler, addr, len); if (node) { + if (node->addr == addr && node->len == len) + goto unlock; __mmu_int_rb_remove(node, &handler->root); list_del(&node->list); /* remove from LRU list */ + ret = true; } +unlock: spin_unlock_irqrestore(&handler->lock, flags); - - return node; + *rb_node = node; + return ret; } void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h index 754f6ebf13fb..f04cec1e99d1 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.h +++ b/drivers/infiniband/hw/hfi1/mmu_rb.h @@ -81,7 +81,8 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg); void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, struct mmu_rb_node *mnode); -struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, - unsigned long addr, unsigned long len); +bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler, + unsigned long addr, unsigned long len, + struct mmu_rb_node **rb_node); #endif /* _HFI1_MMU_RB_H */ diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 16fd519216dc..79450cf2a3d5 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1165,14 +1165,23 @@ static int pin_vector_pages(struct user_sdma_request *req, struct hfi1_user_sdma_pkt_q *pq = req->pq; struct sdma_mmu_node *node = NULL; struct mmu_rb_node *rb_node; - - rb_node = hfi1_mmu_rb_extract(pq->handler, - (unsigned long)iovec->iov.iov_base, - iovec->iov.iov_len); - if (rb_node) + bool extracted; + + extracted = + hfi1_mmu_rb_remove_unless_exact(pq->handler, + (unsigned long) + iovec->iov.iov_base, + iovec->iov.iov_len, &rb_node); + if (rb_node) { node = container_of(rb_node, struct sdma_mmu_node, rb); - else - rb_node = NULL; + if (!extracted) { + atomic_inc(&node->refcount); + iovec->pages = node->pages; + iovec->npages = node->npages; + iovec->node = node; + return 0; + } + } if (!node) { node = kzalloc(sizeof(*node), GFP_KERNEL); -- cgit v1.2.1 From e3304b7cc4f14d365f46d6847a35563ae8b017f7 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:18 -0700 Subject: IB/hfi1: Optimize cachelines for user SDMA request structure The current user SDMA request structure layout has holes. The cachelines can be reduced to improve cacheline trading. Separate fields in the following categories: mostly read, writable and shared with interrupt. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 106 ++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 79450cf2a3d5..92517cebb4c7 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -117,6 +117,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define AHG_KDETH_INTR_SHIFT 12 #define AHG_KDETH_SH_SHIFT 13 +#define AHG_KDETH_ARRAY_SIZE 9 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) @@ -204,25 +205,42 @@ struct evict_data { }; struct user_sdma_request { - struct sdma_req_info info; - struct hfi1_user_sdma_pkt_q *pq; - struct hfi1_user_sdma_comp_q *cq; /* This is the original header from user space */ struct hfi1_pkt_header hdr; + + /* Read mostly fields */ + struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp; + struct hfi1_user_sdma_comp_q *cq; /* * Pointer to the SDMA engine for this request. * Since different request could be on different VLs, * each request will need it's own engine pointer. */ struct sdma_engine *sde; - s8 ahg_idx; - u32 ahg[9]; + struct sdma_req_info info; + /* TID array values copied from the tid_iov vector */ + u32 *tids; + /* total length of the data in the request */ + u32 data_len; + /* number of elements copied to the tids array */ + u16 n_tids; /* - * KDETH.Offset (Eager) field - * We need to remember the initial value so the headers - * can be updated properly. + * We copy the iovs for this request (based on + * info.iovcnt). These are only the data vectors */ - u32 koffset; + u8 data_iovs; + s8 ahg_idx; + + /* Writeable fields shared with interrupt */ + u64 seqcomp ____cacheline_aligned_in_smp; + u64 seqsubmitted; + unsigned long flags; + /* status of the last txreq completed */ + int status; + + /* Send side fields */ + struct list_head txps ____cacheline_aligned_in_smp; + u64 seqnum; /* * KDETH.OFFSET (TID) field * The offset can cover multiple packets, depending on the @@ -230,29 +248,19 @@ struct user_sdma_request { */ u32 tidoffset; /* - * We copy the iovs for this request (based on - * info.iovcnt). These are only the data vectors + * KDETH.Offset (Eager) field + * We need to remember the initial value so the headers + * can be updated properly. */ - unsigned data_iovs; - /* total length of the data in the request */ - u32 data_len; + u32 koffset; + u32 sent; + /* TID index copied from the tid_iov vector */ + u16 tididx; /* progress index moving along the iovs array */ - unsigned iov_idx; + u8 iov_idx; + struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; - /* number of elements copied to the tids array */ - u16 n_tids; - /* TID array values copied from the tid_iov vector */ - u32 *tids; - u16 tididx; - u32 sent; - u64 seqnum; - u64 seqcomp; - u64 seqsubmitted; - struct list_head txps; - unsigned long flags; - /* status of the last txreq completed */ - int status; -}; +} ____cacheline_aligned_in_smp; /* * A single txreq could span up to 3 physical pages when the MTU @@ -1034,11 +1042,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) datalen); if (changes < 0) goto free_tx; - sdma_txinit_ahg(&tx->txreq, - SDMA_TXREQ_F_USE_AHG, - datalen, req->ahg_idx, changes, - req->ahg, sizeof(req->hdr), - user_sdma_txreq_cb); } } else { ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + @@ -1442,21 +1445,22 @@ done: } static int set_txreq_header_ahg(struct user_sdma_request *req, - struct user_sdma_txreq *tx, u32 len) + struct user_sdma_txreq *tx, u32 datalen) { + u32 ahg[AHG_KDETH_ARRAY_SIZE]; int diff = 0; u8 omfactor; /* KDETH.OM */ struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); - u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len)); + u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); if (PBC2LRH(pbclen) != lrhlen) { /* PBC.PbcLengthDWs */ - AHG_HEADER_SET(req->ahg, diff, 0, 0, 12, + AHG_HEADER_SET(ahg, diff, 0, 0, 12, cpu_to_le16(LRH2PBC(lrhlen))); /* LRH.PktLen (we need the full 16 bits due to byte swap) */ - AHG_HEADER_SET(req->ahg, diff, 3, 0, 16, + AHG_HEADER_SET(ahg, diff, 3, 0, 16, cpu_to_be16(lrhlen >> 2)); } @@ -1468,13 +1472,12 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) val32 |= 1UL << 31; - AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); - AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); + AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); + AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); /* KDETH.Offset */ - AHG_HEADER_SET(req->ahg, diff, 15, 0, 16, + AHG_HEADER_SET(ahg, diff, 15, 0, 16, cpu_to_le16(req->koffset & 0xffff)); - AHG_HEADER_SET(req->ahg, diff, 15, 16, 16, - cpu_to_le16(req->koffset >> 16)); + AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16)); if (req_opcode(req->info.ctrl) == EXPECTED) { __le16 val; @@ -1492,9 +1495,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, * we have to check again. */ if (++req->tididx > req->n_tids - 1 || - !req->tids[req->tididx]) { + !req->tids[req->tididx]) return -EINVAL; - } tidval = req->tids[req->tididx]; } omfactor = ((EXP_TID_GET(tidval, LEN) * @@ -1502,7 +1504,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : KDETH_OM_SMALL_SHIFT; /* KDETH.OM and KDETH.OFFSET (TID) */ - AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, + AHG_HEADER_SET(ahg, diff, 7, 0, 16, ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | ((req->tidoffset >> omfactor) & 0x7fff))); @@ -1522,12 +1524,20 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, AHG_KDETH_INTR_SHIFT)); } - AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); + AHG_HEADER_SET(ahg, diff, 7, 16, 14, val); } + if (diff < 0) + return diff; trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, req->sde->this_idx, - req->ahg_idx, req->ahg, diff, tidval); + req->ahg_idx, ahg, diff, tidval); + sdma_txinit_ahg(&tx->txreq, + SDMA_TXREQ_F_USE_AHG, + datalen, req->ahg_idx, diff, + ahg, sizeof(req->hdr), + user_sdma_txreq_cb); + return diff; } -- cgit v1.2.1 From 721c462123b4b53745c1ccd99619b16e8f4e091b Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 26 May 2017 05:35:25 -0700 Subject: IB/hfi1: Name function prototype parameters for affinity module To improve the readability of function prototypes, give the parameters names in the affinity module. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index e78c7aa094e0..2a1e374169c0 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -75,24 +75,26 @@ struct hfi1_msix_entry; /* Initialize non-HT cpu cores mask */ void init_real_cpu_mask(void); /* Initialize driver affinity data */ -int hfi1_dev_affinity_init(struct hfi1_devdata *); +int hfi1_dev_affinity_init(struct hfi1_devdata *dd); /* * Set IRQ affinity to a CPU. The function will determine the * CPU and set the affinity to it. */ -int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *); +int hfi1_get_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix); /* * Remove the IRQ's CPU affinity. This function also updates * any internal CPU tracking data */ -void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *); +void hfi1_put_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix); /* * Determine a CPU affinity for a user process, if the process does not * have an affinity set yet. */ -int hfi1_get_proc_affinity(int); +int hfi1_get_proc_affinity(int node); /* Release a CPU used by a user process. */ -void hfi1_put_proc_affinity(int); +void hfi1_put_proc_affinity(int cpu); struct hfi1_affinity_node { int node; -- cgit v1.2.1 From bb7dde8784913c06ccd1456bed6dcc5ebd0b3c24 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 26 May 2017 05:35:31 -0700 Subject: IB/hfi1: Replace deprecated pci functions with new API pci_enable_msix_range() and pci_disable_msix() have been deprecated. Updating to the new pci_alloc_irq_vectors() interface. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.c | 18 +++--- drivers/infiniband/hw/hfi1/chip.c | 105 +++++++++++++++++----------------- drivers/infiniband/hw/hfi1/hfi.h | 6 +- drivers/infiniband/hw/hfi1/pcie.c | 80 ++++++-------------------- 4 files changed, 78 insertions(+), 131 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index e2cd2cd3b28a..a97055dd4fbd 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -335,10 +335,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) sde->cpu = cpu; cpumask_clear(&msix->mask); cpumask_set_cpu(cpu, &msix->mask); - dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n", - msix->msix.vector, irq_type_names[msix->type], + dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n", + msix->irq, irq_type_names[msix->type], sde->this_idx, cpu); - irq_set_affinity_hint(msix->msix.vector, &msix->mask); + irq_set_affinity_hint(msix->irq, &msix->mask); /* * Set the new cpu in the hfi1_affinity_node and clean @@ -387,7 +387,7 @@ static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix) { struct irq_affinity_notify *notify = &msix->notify; - notify->irq = msix->msix.vector; + notify->irq = msix->irq; notify->notify = hfi1_irq_notifier_notify; notify->release = hfi1_irq_notifier_release; @@ -472,10 +472,10 @@ static int get_irq_affinity(struct hfi1_devdata *dd, } cpumask_set_cpu(cpu, &msix->mask); - dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n", - msix->msix.vector, irq_type_names[msix->type], + dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n", + msix->irq, irq_type_names[msix->type], extra, cpu); - irq_set_affinity_hint(msix->msix.vector, &msix->mask); + irq_set_affinity_hint(msix->irq, &msix->mask); if (msix->type == IRQ_SDMA) { sde->cpu = cpu; @@ -533,7 +533,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, } } - irq_set_affinity_hint(msix->msix.vector, NULL); + irq_set_affinity_hint(msix->irq, NULL); cpumask_clear(&msix->mask); mutex_unlock(&node_affinity.lock); } diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 5dbee3c1bd45..9118618d28e5 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12800,30 +12800,24 @@ static void clean_up_interrupts(struct hfi1_devdata *dd) for (i = 0; i < dd->num_msix_entries; i++, me++) { if (!me->arg) /* => no irq, no affinity */ continue; - hfi1_put_irq_affinity(dd, &dd->msix_entries[i]); - free_irq(me->msix.vector, me->arg); + hfi1_put_irq_affinity(dd, me); + free_irq(me->irq, me->arg); } + + /* clean structures */ + kfree(dd->msix_entries); + dd->msix_entries = NULL; + dd->num_msix_entries = 0; } else { /* INTx */ if (dd->requested_intx_irq) { free_irq(dd->pcidev->irq, dd); dd->requested_intx_irq = 0; } - } - - /* turn off interrupts */ - if (dd->num_msix_entries) { - /* MSI-X */ - pci_disable_msix(dd->pcidev); - } else { - /* INTx */ disable_intx(dd->pcidev); } - /* clean structures */ - kfree(dd->msix_entries); - dd->msix_entries = NULL; - dd->num_msix_entries = 0; + pci_free_irq_vectors(dd->pcidev); } /* @@ -12972,13 +12966,21 @@ static int request_msix_irqs(struct hfi1_devdata *dd) continue; /* make sure the name is terminated */ me->name[sizeof(me->name) - 1] = 0; + me->irq = pci_irq_vector(dd->pcidev, i); + /* + * On err return me->irq. Don't need to clear this + * because 'arg' has not been set, and cleanup will + * do the right thing. + */ + if (me->irq < 0) + return me->irq; - ret = request_threaded_irq(me->msix.vector, handler, thread, 0, + ret = request_threaded_irq(me->irq, handler, thread, 0, me->name, arg); if (ret) { dd_dev_err(dd, - "unable to allocate %s interrupt, vector %d, index %d, err %d\n", - err_info, me->msix.vector, idx, ret); + "unable to allocate %s interrupt, irq %d, index %d, err %d\n", + err_info, me->irq, idx, ret); return ret; } /* @@ -12989,8 +12991,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) ret = hfi1_get_irq_affinity(dd, me); if (ret) - dd_dev_err(dd, - "unable to pin IRQ %d\n", ret); + dd_dev_err(dd, "unable to pin IRQ %d\n", ret); } return ret; @@ -13009,7 +13010,7 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; - synchronize_irq(me->msix.vector); + synchronize_irq(me->irq); } } @@ -13022,7 +13023,7 @@ void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd) return; hfi1_put_irq_affinity(dd, me); - free_irq(me->msix.vector, me->arg); + free_irq(me->irq, me->arg); me->arg = NULL; } @@ -13050,14 +13051,19 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) DRIVER_NAME "_%d kctxt%d", dd->unit, idx); me->name[sizeof(me->name) - 1] = 0; me->type = IRQ_RCVCTXT; - + me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr); + if (me->irq < 0) { + dd_dev_err(dd, "vnic irq vector request (idx %d) fail %d\n", + idx, me->irq); + return; + } remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr); - ret = request_threaded_irq(me->msix.vector, receive_context_interrupt, + ret = request_threaded_irq(me->irq, receive_context_interrupt, receive_context_thread, 0, me->name, arg); if (ret) { - dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n", - me->msix.vector, idx, ret); + dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n", + me->irq, idx, ret); return; } /* @@ -13070,7 +13076,7 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) if (ret) { dd_dev_err(dd, "unable to pin IRQ %d\n", ret); - free_irq(me->msix.vector, me->arg); + free_irq(me->irq, me->arg); } } @@ -13093,9 +13099,8 @@ static void reset_interrupts(struct hfi1_devdata *dd) static int set_up_interrupts(struct hfi1_devdata *dd) { - struct hfi1_msix_entry *entries; - u32 total, request; - int i, ret; + u32 total; + int ret, request; int single_interrupt = 0; /* we expect to have all the interrupts */ /* @@ -13107,39 +13112,31 @@ static int set_up_interrupts(struct hfi1_devdata *dd) */ total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; - entries = kcalloc(total, sizeof(*entries), GFP_KERNEL); - if (!entries) { - ret = -ENOMEM; - goto fail; - } - /* 1-1 MSI-X entry assignment */ - for (i = 0; i < total; i++) - entries[i].msix.entry = i; - /* ask for MSI-X interrupts */ - request = total; - request_msix(dd, &request, entries); - - if (request == 0) { + request = request_msix(dd, total); + if (request < 0) { + ret = request; + goto fail; + } else if (request == 0) { /* using INTx */ /* dd->num_msix_entries already zero */ - kfree(entries); single_interrupt = 1; dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n"); + } else if (request < total) { + /* using MSI-X, with reduced interrupts */ + dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n", + total, request); + ret = -EINVAL; + goto fail; } else { - /* using MSI-X */ - dd->num_msix_entries = request; - dd->msix_entries = entries; - - if (request != total) { - /* using MSI-X, with reduced interrupts */ - dd_dev_err( - dd, - "cannot handle reduced interrupt case, want %u, got %u\n", - total, request); - ret = -EINVAL; + dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries), + GFP_KERNEL); + if (!dd->msix_entries) { + ret = -ENOMEM; goto fail; } + /* using MSI-X */ + dd->num_msix_entries = total; dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total); } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 9c6c73448461..8f74cf6d6c9a 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -521,7 +521,7 @@ static inline void incr_cntr32(u32 *cntr) #define MAX_NAME_SIZE 64 struct hfi1_msix_entry { enum irq_type type; - struct msix_entry msix; + int irq; void *arg; char name[MAX_NAME_SIZE]; cpumask_t mask; @@ -1838,9 +1838,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev); int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *dd); -void request_msix(struct hfi1_devdata *dd, u32 *nent, - struct hfi1_msix_entry *entry); -void hfi1_enable_intx(struct pci_dev *pdev); +int request_msix(struct hfi1_devdata *dd, u32 msireq); void restore_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 6a9f6f9819e1..f01841b51946 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -240,50 +240,6 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd) iounmap(dd->piobase); } -static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt, - struct hfi1_msix_entry *hfi1_msix_entry) -{ - int ret; - int nvec = *msixcnt; - struct msix_entry *msix_entry; - int i; - - /* - * We can't pass hfi1_msix_entry array to msix_setup - * so use a dummy msix_entry array and copy the allocated - * irq back to the hfi1_msix_entry array. - */ - msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL); - if (!msix_entry) { - ret = -ENOMEM; - goto do_intx; - } - - for (i = 0; i < nvec; i++) - msix_entry[i] = hfi1_msix_entry[i].msix; - - ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec); - if (ret < 0) - goto free_msix_entry; - nvec = ret; - - for (i = 0; i < nvec; i++) - hfi1_msix_entry[i].msix = msix_entry[i]; - - kfree(msix_entry); - *msixcnt = nvec; - return; - -free_msix_entry: - kfree(msix_entry); - -do_intx: - dd_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n", - nvec, ret); - *msixcnt = 0; - hfi1_enable_intx(dd->pcidev); -} - /* return the PCIe link speed from the given link status */ static u32 extract_speed(u16 linkstat) { @@ -364,33 +320,29 @@ int pcie_speeds(struct hfi1_devdata *dd) } /* - * Returns in *nent: - * - actual number of interrupts allocated + * Returns: + * - actual number of interrupts allocated or * - 0 if fell back to INTx. + * - error */ -void request_msix(struct hfi1_devdata *dd, u32 *nent, - struct hfi1_msix_entry *entry) +int request_msix(struct hfi1_devdata *dd, u32 msireq) { - int pos; + int nvec; - pos = dd->pcidev->msix_cap; - if (*nent && pos) { - msix_setup(dd, pos, nent, entry); - /* did it, either MSI-X or INTx */ - } else { - *nent = 0; - hfi1_enable_intx(dd->pcidev); + nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, + PCI_IRQ_MSIX | PCI_IRQ_LEGACY); + if (nvec < 0) { + dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec); + return nvec; } tune_pcie_caps(dd); -} -void hfi1_enable_intx(struct pci_dev *pdev) -{ - /* first, turn on INTx */ - pci_intx(pdev, 1); - /* then turn off MSI-X */ - pci_disable_msix(pdev); + /* check for legacy IRQ */ + if (nvec == 1 && !dd->pcidev->msix_enabled) + return 0; + + return nvec; } /* restore command and BARs after a reset has wiped them out */ -- cgit v1.2.1 From cb49366f3616fdf197893c24a5b2677b8c26ce29 Mon Sep 17 00:00:00 2001 From: "Vishwanathapura, Niranjana" Date: Thu, 1 Jun 2017 17:04:02 -0700 Subject: IB/core,rdmavt,hfi1,opa-vnic: Send OPA cap_mask3 in trap Provide the ability for IB clients to modify the OPA specific capability mask and include this mask in the subsequent trap data. Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Michael N. Henry Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 7 ++----- drivers/infiniband/hw/hfi1/mad.h | 2 +- drivers/infiniband/hw/hfi1/verbs.c | 6 +++++- drivers/infiniband/sw/rdmavt/vt.c | 9 +++++++-- drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 27 ++++++++++++++++++++++++- include/rdma/ib_verbs.h | 3 ++- include/rdma/rdma_vt.h | 1 + 7 files changed, 44 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 5977673a52d4..70831ad621b0 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -260,6 +260,7 @@ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num) data.issuer_lid = cpu_to_be32(lid); data.ntc_144.lid = data.issuer_lid; data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); + data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags); send_trap(ibp, &data, sizeof(data)); } @@ -704,11 +705,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT; pi->buffer_units = cpu_to_be32(buffer_units); - pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported | - OPA_CAP_MASK3_IsEthOnFabricSupported); - /* Driver does not support mcast/collective configuration */ - pi->opa_cap_mask &= - cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported); + pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags); pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7) << 3 | (HFI1_MCAST_NR & 0x7)); diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 5aa3fd1be653..a4e2506bd5ca 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -115,7 +115,7 @@ struct opa_mad_notice_attr { __be32 lid; /* LID where change occurred */ __be32 new_cap_mask; /* new capability mask */ __be16 reserved2; - __be16 cap_mask; + __be16 cap_mask3; __be16 change_flags; /* low 4 bits only */ } __packed ntc_144; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index af54d3f4696a..2d7759f0c6b4 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1537,9 +1537,13 @@ static void init_ibport(struct hfi1_pportdata *ppd) /* Set the prefix to the default value (see ch. 4.1.1) */ ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; ibp->rvp.sm_lid = 0; - /* Below should only set bits defined in OPA PortInfo.CapabilityMask */ + /* + * Below should only set bits defined in OPA PortInfo.CapabilityMask + * and PortInfo.CapabilityMask3 + */ ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP | IB_PORT_CAP_MASK_NOTICE_SUP; + ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported; ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 0d7c6bb551d9..64bdd442078a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -202,8 +202,13 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, return -EINVAL; rvp = rdi->ports[port_index]; - rvp->port_cap_flags |= props->set_port_cap_mask; - rvp->port_cap_flags &= ~props->clr_port_cap_mask; + if (port_modify_mask & IB_PORT_OPA_MASK_CHG) { + rvp->port_cap3_flags |= props->set_port_cap_mask; + rvp->port_cap3_flags &= ~props->clr_port_cap_mask; + } else { + rvp->port_cap_flags |= props->set_port_cap_mask; + rvp->port_cap_flags &= ~props->clr_port_cap_mask; + } if (props->set_port_cap_mask || props->clr_port_cap_mask) rdi->driver_f.cap_mask_chg(rdi, port_num); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 875694f9a7f9..32cdd7a35415 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -52,7 +52,9 @@ #include #include -#include +#include +#include +#include #include "opa_vnic_internal.h" @@ -979,6 +981,27 @@ static int vema_register(struct opa_vnic_ctrl_port *cport) return 0; } +/** + * opa_vnic_ctrl_config_dev -- This function sends a trap to the EM + * by way of ib_modify_port to indicate support for ethernet on the + * fabric. + * @cport: pointer to control port + * @en: enable or disable ethernet on fabric support + */ +static void opa_vnic_ctrl_config_dev(struct opa_vnic_ctrl_port *cport, bool en) +{ + struct ib_port_modify pm = { 0 }; + int i; + + if (en) + pm.set_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported; + else + pm.clr_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported; + + for (i = 1; i <= cport->num_ports; i++) + ib_modify_port(cport->ibdev, i, IB_PORT_OPA_MASK_CHG, &pm); +} + /** * opa_vnic_vema_add_one -- Handle new ib device * @device: ib device pointer @@ -1007,6 +1030,7 @@ static void opa_vnic_vema_add_one(struct ib_device *device) c_info("VNIC client initialized\n"); ib_set_client_data(device, &opa_vnic_client, cport); + opa_vnic_ctrl_config_dev(cport, true); } /** @@ -1025,6 +1049,7 @@ static void opa_vnic_vema_rem_one(struct ib_device *device, return; c_info("removing VNIC client\n"); + opa_vnic_ctrl_config_dev(cport, false); vema_unregister(cport); kfree(cport); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8f1ce4e27bbd..9d4d2a74c95e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -577,7 +577,8 @@ struct ib_device_modify { enum ib_port_modify_flags { IB_PORT_SHUTDOWN = 1, IB_PORT_INIT_TYPE = (1<<2), - IB_PORT_RESET_QKEY_CNTR = (1<<3) + IB_PORT_RESET_QKEY_CNTR = (1<<3), + IB_PORT_OPA_MASK_CHG = (1<<4) }; struct ib_port_modify { diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index d0b9f91e5f4d..0f18ffd98dd7 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -75,6 +75,7 @@ struct rvt_ibport { __be64 mkey; u64 tid; u32 port_cap_flags; + u16 port_cap3_flags; u32 pma_sample_start; u32 pma_sample_interval; __be16 pma_counter_select[5]; -- cgit v1.2.1 From b888429c202197916c8c549811a2dd62f090280d Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:44 -0700 Subject: IB/hfi1: Remove atomic SDMA_REQ_SEND_DONE bit operation The atomic SDMA_REQ_SEND_DONE bit is set by the process-level code, and then the same process-level code uses the bit to test that all packets have been submitted incurring a costly atomic read. Use a bool type with a READ_ONCE/WRITE_ONCE pairing for this bit, and use the same condition that is used to set the bit to test that all packets have been submitted. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 92517cebb4c7..6fb70f0064a6 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -154,10 +154,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ /* SDMA request flag bits */ -#define SDMA_REQ_FOR_THREAD 1 -#define SDMA_REQ_SEND_DONE 2 -#define SDMA_REQ_HAS_ERROR 3 -#define SDMA_REQ_DONE_ERROR 4 +#define SDMA_REQ_HAS_ERROR 1 +#define SDMA_REQ_DONE_ERROR 2 #define SDMA_PKT_Q_INACTIVE BIT(0) #define SDMA_PKT_Q_ACTIVE BIT(1) @@ -258,6 +256,7 @@ struct user_sdma_request { u16 tididx; /* progress index moving along the iovs array */ u8 iov_idx; + u8 done; struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; } ____cacheline_aligned_in_smp; @@ -628,6 +627,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->seqsubmitted = 0; req->flags = 0; req->tids = NULL; + req->done = 0; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -809,7 +809,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, * request have been submitted to the SDMA engine. However, it * will not wait for send completions. */ - while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) { + while (req->seqsubmitted != req->info.npkts) { ret = user_sdma_send_pkts(req, pcount); if (ret < 0) { if (ret != -EBUSY) { @@ -1118,7 +1118,7 @@ dosend: ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); req->seqsubmitted += count; if (req->seqsubmitted == req->info.npkts) { - set_bit(SDMA_REQ_SEND_DONE, &req->flags); + WRITE_ONCE(req->done, 1); /* * The txreq has already been submitted to the HW queue * so we can free the AHG entry now. Corruption will not @@ -1585,7 +1585,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) if (status != SDMA_TXREQ_S_OK) req->status = status; if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && - (test_bit(SDMA_REQ_SEND_DONE, &req->flags) || + (READ_ONCE(req->done) || test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) { user_sdma_free_request(req, false); pq_update(pq); -- cgit v1.2.1 From e9c48ebd0cb4d31bbaf60ddec2a2fa40227b8cb5 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:50 -0700 Subject: IB/hfi1: Remove atomic SDMA_REQ_HAS_ERROR bit operation Atomic bit tests are used to single errors and the completion of request submissions. These operations don't need to be atomic and show to be expensive on the profile. Replace each atomic bit operation with a bool type and a READ_ONCE/WRITE_ONCE pairing. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 6fb70f0064a6..fcadbb9978ca 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -153,10 +153,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ -/* SDMA request flag bits */ -#define SDMA_REQ_HAS_ERROR 1 -#define SDMA_REQ_DONE_ERROR 2 - #define SDMA_PKT_Q_INACTIVE BIT(0) #define SDMA_PKT_Q_ACTIVE BIT(1) #define SDMA_PKT_Q_DEFERRED BIT(2) @@ -232,7 +228,6 @@ struct user_sdma_request { /* Writeable fields shared with interrupt */ u64 seqcomp ____cacheline_aligned_in_smp; u64 seqsubmitted; - unsigned long flags; /* status of the last txreq completed */ int status; @@ -257,6 +252,7 @@ struct user_sdma_request { /* progress index moving along the iovs array */ u8 iov_idx; u8 done; + u8 has_error; struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; } ____cacheline_aligned_in_smp; @@ -625,9 +621,9 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->seqnum = 0; req->seqcomp = 0; req->seqsubmitted = 0; - req->flags = 0; req->tids = NULL; req->done = 0; + req->has_error = 0; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -814,7 +810,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, if (ret < 0) { if (ret != -EBUSY) { req->status = ret; - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + WRITE_ONCE(req->has_error, 1); if (ACCESS_ONCE(req->seqcomp) == req->seqsubmitted - 1) goto free_req; @@ -916,10 +912,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) pq = req->pq; /* If tx completion has reported an error, we are done. */ - if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) { - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + if (READ_ONCE(req->has_error)) return -EFAULT; - } /* * Check if we might have sent the entire request already @@ -942,10 +936,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) * with errors. If so, we are not going to process any * more packets from this request. */ - if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) { - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + if (READ_ONCE(req->has_error)) return -EFAULT; - } tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL); if (!tx) @@ -1566,7 +1558,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) if (status != SDMA_TXREQ_S_OK) { SDMA_DBG(req, "SDMA completion with error %d", status); - set_bit(SDMA_REQ_HAS_ERROR, &req->flags); + WRITE_ONCE(req->has_error, 1); } req->seqcomp = tx->seqnum; @@ -1586,7 +1578,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) req->status = status; if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && (READ_ONCE(req->done) || - test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) { + READ_ONCE(req->has_error))) { user_sdma_free_request(req, false); pq_update(pq); set_comp_state(pq, cq, idx, ERROR, req->status); -- cgit v1.2.1 From b4e9e2f0fc87b876a4e2162733c1940e861785b1 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:57 -0700 Subject: IB/hfi1: Reclassify type of messages printed for platform config logic Reclassify messages printed out to /var/log/messages into warnings and errors to facilitate debugging in the future for issues related to the platform config logic. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 838fe84e285a..cbda9b189216 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -242,7 +242,7 @@ static int qual_power(struct hfi1_pportdata *ppd) if (ppd->offline_disabled_reason == HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) { - dd_dev_info( + dd_dev_err( ppd->dd, "%s: Port disabled due to system power restrictions\n", __func__); @@ -268,7 +268,7 @@ static int qual_bitrate(struct hfi1_pportdata *ppd) if (ppd->offline_disabled_reason == HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) { - dd_dev_info( + dd_dev_err( ppd->dd, "%s: Cable failed bitrate check, disabling port\n", __func__); @@ -709,15 +709,15 @@ static void apply_tunings( ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) - dd_dev_info(ppd->dd, - "%s: Failed set ext device config params\n", - __func__); + dd_dev_err(ppd->dd, + "%s: Failed set ext device config params\n", + __func__); } if (tx_preset_index == OPA_INVALID_INDEX) { if (ppd->port_type == PORT_TYPE_QSFP && limiting_active) - dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n", - __func__); + dd_dev_err(ppd->dd, "%s: Invalid Tx preset index\n", + __func__); return; } @@ -900,7 +900,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd, case 0xD: /* fallthrough */ case 0xF: default: - dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n", + dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n", __func__); break; } @@ -942,7 +942,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) case PORT_TYPE_DISCONNECTED: ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED); - dd_dev_info(dd, "%s: Port disconnected, disabling port\n", + dd_dev_warn(dd, "%s: Port disconnected, disabling port\n", __func__); goto bail; case PORT_TYPE_FIXED: @@ -1027,7 +1027,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) } break; default: - dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__); + dd_dev_warn(ppd->dd, "%s: Unknown port type\n", __func__); ppd->port_type = PORT_TYPE_UNKNOWN; tuning_method = OPA_UNKNOWN_TUNING; total_atten = 0; -- cgit v1.2.1 From 90dba23e1e30af72dbf4379842a5161e811b26b8 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 26 May 2017 05:36:04 -0700 Subject: IB/hfi1: Fix up sdma_init function comment sdma_init does not take a number of sdma engine parameters, rather it initializes all of the sdma engines. Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sdma.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index bfd0d5187e9b..d82ff57214c5 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1340,10 +1340,8 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) * @dd: hfi1_devdata * @port: port number (currently only zero) * - * sdma_init initializes the specified number of engines. - * - * The code initializes each sde, its csrs. Interrupts - * are not required to be enabled. + * Initializes each sde and its csrs. + * Interrupts are not required to be enabled. * * Returns: * 0 - success, -errno on failure -- cgit v1.2.1 From b2f8a04e77bad520d52b7f321ca776b33c947ad0 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:17:28 -0700 Subject: IB/rdmavt: Remove duplicated functions The free_qpn() function from the hfi1/qib driver which was the basis for rdmavt_free_qpn() function was accidentally left in the code. Remove it. Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index a3dd1e536860..a372afbbfbef 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -422,15 +422,6 @@ bail: return ret; } -static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) -{ - struct rvt_qpn_map *map; - - map = qpt->map + qpn / RVT_BITS_PER_PAGE; - if (map->page) - clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); -} - /** * rvt_clear_mr_refs - Drop help mr refs * @qp: rvt qp data structure @@ -646,6 +637,19 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, lockdep_assert_held(&qp->s_lock); } +/** rvt_free_qpn - Free a qpn from the bit map + * @qpt: QP table + * @qpn: queue pair number to free + */ +static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) +{ + struct rvt_qpn_map *map; + + map = qpt->map + qpn / RVT_BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); +} + /** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for @@ -936,7 +940,7 @@ bail_ip: kref_put(&qp->ip->ref, rvt_release_mmap_info); bail_qpn: - free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); + rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: if (!qp->ip) @@ -1325,19 +1329,6 @@ inval: return -EINVAL; } -/** rvt_free_qpn - Free a qpn from the bit map - * @qpt: QP table - * @qpn: queue pair number to free - */ -static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) -{ - struct rvt_qpn_map *map; - - map = qpt->map + qpn / RVT_BITS_PER_PAGE; - if (map->page) - clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); -} - /** * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy -- cgit v1.2.1 From bc54f6714c3a5d1f7ac6e7e5a5f7c390b1a01285 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:18:14 -0700 Subject: IB/hfi1: Ensure dd->gi_mask can not be overflowed As the code stands today the array access in remap_intr() is OK. To future proof the code though we should explicitly check to ensure the index value is not outside of the valid range. This is not a straight forward calculation so err on the side of caution. Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9118618d28e5..d6af715c35bf 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12832,7 +12832,12 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) /* clear from the handled mask of the general interrupt */ m = isrc / 64; n = isrc % 64; - dd->gi_mask[m] &= ~((u64)1 << n); + if (likely(m < CCE_NUM_INT_CSRS)) { + dd->gi_mask[m] &= ~((u64)1 << n); + } else { + dd_dev_err(dd, "remap interrupt err\n"); + return; + } /* direct the chip source to the given MSI-X interrupt */ m = isrc / 8; -- cgit v1.2.1 From 67838e64fa63415fe4e0da7149bcb123ed9f5612 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:18:46 -0700 Subject: IB/hfi1: Fix spelling mistake in linkdown reason Spell receive correctly in OPA_LINKDOWN_REASON_RCV_ERROR Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index d6af715c35bf..99c29ddcca35 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6906,7 +6906,7 @@ static void reset_neighbor_info(struct hfi1_pportdata *ppd) static const char * const link_down_reason_strs[] = { [OPA_LINKDOWN_REASON_NONE] = "None", - [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Recive error 0", + [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Receive error 0", [OPA_LINKDOWN_REASON_BAD_PKT_LEN] = "Bad packet length", [OPA_LINKDOWN_REASON_PKT_TOO_LONG] = "Packet too long", [OPA_LINKDOWN_REASON_PKT_TOO_SHORT] = "Packet too short", -- cgit v1.2.1 From 6c31e5283cb06b81a13cc88da2f2ad3db594a935 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:19:21 -0700 Subject: IB/hfi1: Use QPN mask to avoid overflow Ensure we can't come up with an array size that is bigger than the array by applying the QPN mask before the divide in the free_qpn function. Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index a372afbbfbef..2ce0928dddd6 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -645,7 +645,7 @@ static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) { struct rvt_qpn_map *map; - map = qpt->map + qpn / RVT_BITS_PER_PAGE; + map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE; if (map->page) clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); } -- cgit v1.2.1 From 52d86e72c515ebd4fb0d329470aa50698e28fb36 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:19:46 -0700 Subject: IB/hfi1: Remove subtraction of uninitialized value In process_receive_packet the packet header field is used to calculate the length of the packet. However this is not necessarily setup. In fact only if the ECN prescan is enabled will the packet header be valid at this point. The code works as is because we do not do anything with the packet length at this point in the packet processing. The length and header are setup correctly in hfi1_setup_ib_header which is called by the following sequence: process_receive_packet() -> rhf_receieve_function_map[]() --> process_receive_ib() ---> hfi1_setup_9B_packet() ----> hfi1_setup_ib_header() Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 2a1022e374a5..9e59430bc55c 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -703,7 +703,6 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) packet->etype = rhf_rcv_type(packet->rhf); - packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; /* total length */ packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */ /* retrieve eager buffer details */ -- cgit v1.2.1 From f1685179820bb7f9d9c4f5d0ac9bfb269529a919 Mon Sep 17 00:00:00 2001 From: Neel Desai Date: Mon, 29 May 2017 17:20:27 -0700 Subject: IB/hfi1: Add error checking for buffer overrun in OPA aggregate Improve safety of code by checking the size of the data buffer and prevent buffer overrun Reviewed-by: Dennis Dalessandro Reviewed-by: Brian Welty Signed-off-by: Neel Desai Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 296 ++++++++++++++++++++++++--------------- 1 file changed, 185 insertions(+), 111 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 70831ad621b0..b180dff5f2e9 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -59,6 +59,14 @@ #define OPA_LINK_WIDTH_RESET_OLD 0x0fff #define OPA_LINK_WIDTH_RESET 0xffff +static int smp_length_check(u32 data_size, u32 request_len) +{ + if (unlikely(request_len < data_size)) + return -EINVAL; + + return 0; +} + static int reply(struct ib_mad_hdr *smp) { /* @@ -308,11 +316,11 @@ void hfi1_node_desc_chg(struct hfi1_ibport *ibp) static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 port, u32 *resp_len, u32 max_len) { struct opa_node_description *nd; - if (am) { + if (am || smp_length_check(sizeof(*nd), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -329,7 +337,7 @@ static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am, static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_node_info *ni; struct hfi1_devdata *dd = dd_from_ibdev(ibdev); @@ -339,6 +347,7 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data, /* GUID 0 is illegal */ if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 || + smp_length_check(sizeof(*ni), max_len) || get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -520,7 +529,7 @@ void read_ltp_rtt(struct hfi1_devdata *dd) static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { int i; struct hfi1_devdata *dd; @@ -536,7 +545,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, u32 buffer_units; u64 tmp = 0; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -745,7 +754,7 @@ static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 n_blocks_req = OPA_AM_NBLK(am); @@ -768,6 +777,11 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16); + if (smp_length_check(size, max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + if (start_block + n_blocks_req > n_blocks_avail || n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) { pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; " @@ -1071,7 +1085,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, */ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_port_info *pi = (struct opa_port_info *)data; struct ib_event event; @@ -1092,7 +1106,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, int ret, i, invalid = 0, call_set_mtu = 0; int call_link_downgrade_policy = 0; - if (num_ports != 1) { + if (num_ports != 1 || + smp_length_check(sizeof(*pi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1343,7 +1358,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, if (ret) return ret; - ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len); + ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len, + max_len); /* restore re-reg bit per o14-12.2.1 */ pi->clientrereg_subnettimeout |= clientrereg; @@ -1360,7 +1376,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, return ret; get_only: - return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len, + max_len); } /** @@ -1421,7 +1438,7 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 n_blocks_sent = OPA_AM_NBLK(am); @@ -1431,6 +1448,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, int i; u16 n_blocks_avail; unsigned npkeys = hfi1_get_npkeys(dd); + u32 size = 0; if (n_blocks_sent == 0) { pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n", @@ -1441,6 +1459,13 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1; + size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE); + + if (smp_length_check(size, max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + if (start_block + n_blocks_sent > n_blocks_avail || n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) { pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n", @@ -1458,7 +1483,8 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len, + max_len); } #define ILLEGAL_VL 12 @@ -1519,14 +1545,14 @@ static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data) static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */ unsigned i; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1542,14 +1568,15 @@ static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; + size_t size = ARRAY_SIZE(ibp->sl_to_sc); int i; u8 sc; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1564,19 +1591,20 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, } } - return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */ unsigned i; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1592,13 +1620,14 @@ static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); + size_t size = ARRAY_SIZE(ibp->sc_to_sl); u8 *p = data; int i; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1606,19 +1635,20 @@ static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++) ibp->sc_to_sl[i] = *p++; - return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NBLK(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); void *vp = (void *)data; size_t size = 4 * sizeof(u64); - if (n_blocks != 1) { + if (n_blocks != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1633,7 +1663,7 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NBLK(am); int async_update = OPA_AM_ASYNC(am); @@ -1641,8 +1671,15 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, void *vp = (void *)data; struct hfi1_pportdata *ppd; int lstate; + /* + * set_sc2vlt_tables writes the information contained in *data + * to four 64-bit registers SendSC2VLt[0-3]. We need to make + * sure *max_len is not greater than the total size of the four + * SendSC2VLt[0-3] registers. + */ + size_t size = 4 * sizeof(u64); - if (n_blocks != 1 || async_update) { + if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1662,27 +1699,28 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, set_sc2vlt_tables(dd, vp); - return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; void *vp = (void *)data; - int size; + int size = sizeof(struct sc2vlnt); - if (n_blocks != 1) { + if (n_blocks != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } ppd = dd->pport + (port - 1); - size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp); + fm_get_table(ppd, FM_TBL_SC2VLNT, vp); if (resp_len) *resp_len += size; @@ -1692,15 +1730,16 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; void *vp = (void *)data; int lstate; + int size = sizeof(struct sc2vlnt); - if (n_blocks != 1) { + if (n_blocks != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1718,12 +1757,12 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, fm_set_table(ppd, FM_TBL_SC2VLNT, vp); return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); } static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 nports = OPA_AM_NPORT(am); u32 start_of_sm_config = OPA_AM_START_SM_CFG(am); @@ -1732,7 +1771,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct hfi1_pportdata *ppd; struct opa_port_state_info *psi = (struct opa_port_state_info *)data; - if (nports != 1) { + if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1765,7 +1804,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 nports = OPA_AM_NPORT(am); u32 start_of_sm_config = OPA_AM_START_SM_CFG(am); @@ -1776,7 +1815,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct opa_port_state_info *psi = (struct opa_port_state_info *)data; int ret, invalid = 0; - if (nports != 1) { + if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1806,19 +1845,21 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, if (invalid) smp->status |= IB_SMP_INVALID_FIELD; - return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 addr = OPA_AM_CI_ADDR(am); u32 len = OPA_AM_CI_LEN(am) + 1; int ret; - if (dd->pport->port_type != PORT_TYPE_QSFP) { + if (dd->pport->port_type != PORT_TYPE_QSFP || + smp_length_check(len, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1861,21 +1902,22 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, } static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, u8 port, u32 *resp_len, + u32 max_len) { u32 num_ports = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; struct buffer_control *p = (struct buffer_control *)data; - int size; + int size = sizeof(struct buffer_control); - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } ppd = dd->pport + (port - 1); - size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p); + fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p); trace_bct_get(dd, p); if (resp_len) *resp_len += size; @@ -1884,14 +1926,15 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data, } static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, u8 port, u32 *resp_len, + u32 max_len) { u32 num_ports = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; struct buffer_control *p = (struct buffer_control *)data; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1902,41 +1945,43 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port)); u32 num_ports = OPA_AM_NPORT(am); u8 section = (am & 0x00ff0000) >> 16; u8 *p = data; - int size = 0; + int size = 256; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } switch (section) { case OPA_VLARB_LOW_ELEMENTS: - size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p); + fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p); break; case OPA_VLARB_HIGH_ELEMENTS: - size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p); + fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p); break; case OPA_VLARB_PREEMPT_ELEMENTS: - size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p); + fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p); break; case OPA_VLARB_PREEMPT_MATRIX: - size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p); + fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p); break; default: pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n", be32_to_cpu(smp->attr_mod)); smp->status |= IB_SMP_INVALID_FIELD; + size = 0; break; } @@ -1948,14 +1993,15 @@ static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port)); u32 num_ports = OPA_AM_NPORT(am); u8 section = (am & 0x00ff0000) >> 16; u8 *p = data; + int size = 256; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1983,7 +2029,8 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, break; } - return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len, + max_len); } struct opa_pma_mad { @@ -3279,13 +3326,18 @@ struct opa_congestion_info_attr { static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_congestion_info_attr *p = (struct opa_congestion_info_attr *)data; struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + if (smp_length_check(sizeof(*p), max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + p->congestion_info = 0; p->control_table_cap = ppd->cc_max_table_entries; p->congestion_log_length = OPA_CONG_LOG_ELEMS; @@ -3298,7 +3350,7 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data, static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 port, u32 *resp_len, u32 max_len) { int i; struct opa_congestion_setting_attr *p = @@ -3308,6 +3360,11 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, struct opa_congestion_setting_entry_shadow *entries; struct cc_state *cc_state; + if (smp_length_check(sizeof(*p), max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + rcu_read_lock(); cc_state = get_cc_state(ppd); @@ -3382,7 +3439,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_congestion_setting_attr *p = (struct opa_congestion_setting_attr *)data; @@ -3391,6 +3448,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct opa_congestion_setting_entry_shadow *entries; int i; + if (smp_length_check(sizeof(*p), max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + /* * Save details from packet into the ppd. Hold the cc_state_lock so * our information is consistent with anyone trying to apply the state. @@ -3412,12 +3474,12 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, apply_cc_state(ppd); return __subn_get_opa_cong_setting(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); } static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 port, u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -3425,7 +3487,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, s64 ts; int i; - if (am != 0) { + if (am || smp_length_check(sizeof(*cong_log), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -3483,7 +3545,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct ib_cc_table_attr *cc_table_attr = (struct ib_cc_table_attr *)data; @@ -3495,9 +3557,10 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, int i, j; u32 sentry, eentry; struct cc_state *cc_state; + u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); /* sanity check n_blocks, start_block */ - if (n_blocks == 0 || + if (n_blocks == 0 || smp_length_check(size, max_len) || start_block + n_blocks > ppd->cc_max_table_entries) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -3527,14 +3590,14 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, rcu_read_unlock(); if (resp_len) - *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); + *resp_len += size; return reply((struct ib_mad_hdr *)smp); } static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -3545,9 +3608,10 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, int i, j; u32 sentry, eentry; u16 ccti_limit; + u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); /* sanity check n_blocks, start_block */ - if (n_blocks == 0 || + if (n_blocks == 0 || smp_length_check(size, max_len) || start_block + n_blocks > ppd->cc_max_table_entries) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -3578,7 +3642,8 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, /* now apply the information */ apply_cc_state(ppd); - return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len, + max_len); } struct opa_led_info { @@ -3591,7 +3656,7 @@ struct opa_led_info { static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd = dd->pport; @@ -3599,7 +3664,7 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, u32 nport = OPA_AM_NPORT(am); u32 is_beaconing_active; - if (nport != 1) { + if (nport != 1 || smp_length_check(sizeof(*p), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -3621,14 +3686,14 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct opa_led_info *p = (struct opa_led_info *)data; u32 nport = OPA_AM_NPORT(am); int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK); - if (nport != 1) { + if (nport != 1 || smp_length_check(sizeof(*p), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -3638,12 +3703,13 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, else shutdown_led_override(dd->pport); - return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len, + max_len); } static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { int ret; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -3651,71 +3717,71 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, switch (attr_id) { case IB_SMP_ATTR_NODE_DESC: ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_NODE_INFO: ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_PORT_INFO: ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_PKEY_TABLE: ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SL_TO_SC_MAP: ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_SL_MAP: ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLT_MAP: ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLNT_MAP: ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_get_opa_psi(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE: ret = __subn_get_opa_bct(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_CABLE_INFO: ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_VL_ARB_TABLE: ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_CONGESTION_INFO: ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING: ret = __subn_get_opa_cong_setting(smp, am, data, ibdev, - port, resp_len); + port, resp_len, max_len); break; case OPA_ATTRIB_ID_HFI_CONGESTION_LOG: ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev, - port, resp_len); + port, resp_len, max_len); break; case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE: ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_LED_INFO: ret = __subn_get_opa_led_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_SM_INFO: if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) @@ -3733,7 +3799,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { int ret; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -3741,51 +3807,51 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, switch (attr_id) { case IB_SMP_ATTR_PORT_INFO: ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_PKEY_TABLE: ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SL_TO_SC_MAP: ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_SL_MAP: ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLT_MAP: ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLNT_MAP: ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_set_opa_psi(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE: ret = __subn_set_opa_bct(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_VL_ARB_TABLE: ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING: ret = __subn_set_opa_cong_setting(smp, am, data, ibdev, - port, resp_len); + port, resp_len, max_len); break; case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE: ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_LED_INFO: ret = __subn_set_opa_led_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_SM_INFO: if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) @@ -3841,7 +3907,10 @@ static int subn_get_opa_aggregate(struct opa_smp *smp, memset(next_smp + sizeof(*agg), 0, agg_data_len); (void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data, - ibdev, port, NULL); + ibdev, port, NULL, (u32)agg_data_len); + + if (smp->status & IB_SMP_INVALID_FIELD) + break; if (smp->status & ~IB_SMP_DIRECTION) { set_aggr_error(agg); return reply((struct ib_mad_hdr *)smp); @@ -3884,7 +3953,9 @@ static int subn_set_opa_aggregate(struct opa_smp *smp, } (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data, - ibdev, port, NULL); + ibdev, port, NULL, (u32)agg_data_len); + if (smp->status & IB_SMP_INVALID_FIELD) + break; if (smp->status & ~IB_SMP_DIRECTION) { set_aggr_error(agg); return reply((struct ib_mad_hdr *)smp); @@ -3994,12 +4065,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, struct opa_smp *smp = (struct opa_smp *)out_mad; struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *data; - u32 am; + u32 am, data_size; __be16 attr_id; int ret; *out_mad = *in_mad; data = opa_get_smp_data(smp); + data_size = (u32)opa_get_smp_data_size(smp); am = be32_to_cpu(smp->attr_mod); attr_id = smp->attr_id; @@ -4043,7 +4115,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, default: clear_opa_smp_data(smp); ret = subn_get_opa_sma(attr_id, smp, am, data, - ibdev, port, resp_len); + ibdev, port, resp_len, + data_size); break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_get_opa_aggregate(smp, ibdev, port, @@ -4055,7 +4128,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, switch (attr_id) { default: ret = subn_set_opa_sma(attr_id, smp, am, data, - ibdev, port, resp_len); + ibdev, port, resp_len, + data_size); break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_set_opa_aggregate(smp, ibdev, port, -- cgit v1.2.1 From d54389836ac63cb517bf863b9f6c22626c6aec26 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 29 May 2017 17:20:53 -0700 Subject: IB/core: Allow QP state transition from reset to error Playing with IP-O-IB interface can trigger a warning message: "ib0: Failed to modify QP to ERROR state" to be logged. This happens when the QP is in IB_QPS_RESET state and the stack is trying to transition it to IB_QPS_ERR state in ipoib_ib_dev_stop(). According to the IB spec, Table 91 - "QP State Transition Properties" it looks like the transition from reset to error is valid: Transition: Any State to Error Required Attributes: None Optional Attributes: None allowed Actions: Queue processing is stopped. Work Requests pending or in process are completed in error, when possible. This patch allows the transition and quiets the message. Reviewed-by: Dennis Dalessandro Signed-off-by: Tadeusz Struk Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 4792f5209ac2..b822bedeb4a5 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -872,6 +872,7 @@ static const struct { } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .req_param = { -- cgit v1.2.1 From bec7c79cd8f764ba84c8ec6d8c402b8a7cd3a54f Mon Sep 17 00:00:00 2001 From: "Byczkowski, Jakub" Date: Mon, 29 May 2017 17:21:32 -0700 Subject: IB/hfi1: Modify handling of physical link state by Host Driver Ensure states returned to the Fabric Manager are consistent with the OPA specification by caching the physical state along with the logical state. Reviewed-by: Stuart Summers Reviewed-by: Ira Weiny Reviewed-by: Andrzej Kotlowski Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 102 +++++++++++++++++++++++++----------- drivers/infiniband/hw/hfi1/chip.h | 2 +- drivers/infiniband/hw/hfi1/driver.c | 8 +-- drivers/infiniband/hw/hfi1/hfi.h | 18 ++++++- drivers/infiniband/hw/hfi1/mad.c | 6 +-- drivers/infiniband/hw/hfi1/verbs.c | 2 +- 6 files changed, 97 insertions(+), 41 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 99c29ddcca35..3fae98d079e4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1066,6 +1066,8 @@ static int thermal_init(struct hfi1_devdata *dd); static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, + int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); @@ -10028,28 +10030,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) sdma_update_lmc(dd, mask, ppd->lid); } -static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs) -{ - unsigned long timeout; - u32 curr_state; - - timeout = jiffies + msecs_to_jiffies(msecs); - while (1) { - curr_state = read_physical_state(dd); - if (curr_state == state) - break; - if (time_after(jiffies, timeout)) { - dd_dev_err(dd, - "timeout waiting for phy link state 0x%x, current state is 0x%x\n", - state, curr_state); - return -ETIMEDOUT; - } - usleep_range(1950, 2050); /* sleep 2ms-ish */ - } - - return 0; -} - static const char *state_completed_string(u32 completed) { static const char * const state_completed[] = { @@ -10283,7 +10263,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) if (do_wait) { /* it can take a while for the link to go down */ - ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000); + ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000); if (ret < 0) return ret; } @@ -10536,6 +10516,19 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) goto unexpected; } + /* + * Wait for Link_Up physical state. + * Physical and Logical states should already be + * be transitioned to LinkUp and LinkInit respectively. + */ + ret = wait_physical_linkstate(ppd, PLS_LINKUP, 1000); + if (ret) { + dd_dev_err(dd, + "%s: physical state did not change to LINK-UP\n", + __func__); + break; + } + ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000); if (ret) { dd_dev_err(dd, @@ -10649,6 +10642,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) */ if (ret) goto_offline(ppd, 0); + else + cache_physical_state(ppd); break; case HLS_DN_DISABLE: /* link is disabled */ @@ -10673,6 +10668,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret = -EINVAL; break; } + ret = wait_physical_linkstate(ppd, PLS_DISABLED, 10000); + if (ret) { + dd_dev_err(dd, + "%s: physical state did not change to DISABLED\n", + __func__); + break; + } dc_shutdown(dd); } ppd->host_link_state = HLS_DN_DISABLE; @@ -10690,6 +10692,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ppd->host_link_state != HLS_DN_POLL) goto unexpected; ppd->host_link_state = HLS_VERIFY_CAP; + cache_physical_state(ppd); break; case HLS_GOING_UP: if (ppd->host_link_state != HLS_VERIFY_CAP) @@ -12663,21 +12666,56 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, return -ETIMEDOUT; } -u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd) +/* + * Read the physical hardware link state and set the driver's cached value + * of it. + */ +void cache_physical_state(struct hfi1_pportdata *ppd) { - u32 pstate; + u32 read_pstate; u32 ib_pstate; - pstate = read_physical_state(ppd->dd); - ib_pstate = chip_to_opa_pstate(ppd->dd, pstate); - if (ppd->last_pstate != ib_pstate) { + read_pstate = read_physical_state(ppd->dd); + ib_pstate = chip_to_opa_pstate(ppd->dd, read_pstate); + /* check if OPA pstate changed */ + if (chip_to_opa_pstate(ppd->dd, ppd->pstate) != ib_pstate) { dd_dev_info(ppd->dd, "%s: physical state changed to %s (0x%x), phy 0x%x\n", __func__, opa_pstate_name(ib_pstate), ib_pstate, - pstate); - ppd->last_pstate = ib_pstate; + read_pstate); + } + ppd->pstate = read_pstate; +} + +/* + * wait_physical_linkstate - wait for an physical link state change to occur + * @ppd: port device + * @state: the state to wait for + * @msecs: the number of milliseconds to wait + * + * Wait up to msecs milliseconds for physical link state change to occur. + * Returns 0 if state reached, otherwise -ETIMEDOUT. + */ +static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, + int msecs) +{ + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(msecs); + while (1) { + cache_physical_state(ppd); + if (ppd->pstate == state) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for phy link state 0x%x, current state is 0x%x\n", + state, ppd->pstate); + return -ETIMEDOUT; + } + usleep_range(1950, 2050); /* sleep 2ms-ish */ } - return ib_pstate; + + return 0; } #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ @@ -14781,7 +14819,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* start in offline */ ppd->host_link_state = HLS_DN_OFFLINE; init_vl_arb_caches(ppd); - ppd->last_pstate = 0xff; /* invalid value */ + ppd->pstate = PLS_OFFLINE; } dd->link_default = HLS_DN_POLL; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 0b4f418ba0ac..3dab3156ba4a 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -744,6 +744,7 @@ int is_bx(struct hfi1_devdata *dd); u32 read_physical_state(struct hfi1_devdata *dd); u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate); u32 get_logical_state(struct hfi1_pportdata *ppd); +void cache_physical_state(struct hfi1_pportdata *ppd); const char *opa_lstate_name(u32 lstate); const char *opa_pstate_name(u32 pstate); u32 driver_physical_state(struct hfi1_pportdata *ppd); @@ -1354,7 +1355,6 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt); u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); -u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 9e59430bc55c..e64e9e28c936 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -906,10 +906,12 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, sc = hfi1_9B_get_sc5(hdr, packet->rhf); } if (sc != SC15_PACKET) { - int hwstate = read_logical_state(dd); + int hwstate = driver_lstate(rcd->ppd); - if (hwstate != LSTATE_ACTIVE) { - dd_dev_info(dd, "Unexpected link state %d\n", hwstate); + if (hwstate != IB_PORT_ACTIVE) { + dd_dev_info(dd, + "Unexpected link state %s\n", + opa_lstate_name(hwstate)); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 8f74cf6d6c9a..bca781c3b5ac 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -663,7 +663,7 @@ struct hfi1_pportdata { u8 link_enabled; /* link enabled? */ u8 linkinit_reason; u8 local_tx_rate; /* rate given to 8051 firmware */ - u8 last_pstate; /* info only */ + u8 pstate; /* info only */ u8 qsfp_retry_count; /* placeholders for IB MAD packet settings */ @@ -1330,6 +1330,22 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) return ppd->lstate; } +/* return the driver's idea of the physical OPA port state */ +static inline u32 driver_pstate(struct hfi1_pportdata *ppd) +{ + /* + * The driver does some processing from the time the physical + * link state is at LINKUP to the time the SM can be notified + * as such. Return IB_PORTPHYSSTATE_TRAINING until the software + * state is ready. + */ + if (ppd->pstate == PLS_LINKUP && + !(ppd->host_link_state & HLS_UP)) + return IB_PORTPHYSSTATE_TRAINING; + else + return chip_to_opa_pstate(ppd->dd, ppd->pstate); +} + void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index b180dff5f2e9..c8daf633212d 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -113,7 +113,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) return; /* o14-3.2.1 */ - if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE) + if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) return; /* o14-2 */ @@ -615,7 +615,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ppd->offline_disabled_reason; pi->port_states.portphysstate_portstate = - (hfi1_ibphys_portstate(ppd) << 4) | state; + (driver_pstate(ppd) << 4) | state; pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc; @@ -1791,7 +1791,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, ppd->offline_disabled_reason; psi->port_states.portphysstate_portstate = - (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf); + (driver_pstate(ppd) << 4) | (lstate & 0xf); psi->link_width_downgrade_tx_active = cpu_to_be16(ppd->link_width_downgrade_tx_active); psi->link_width_downgrade_rx_active = diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2d7759f0c6b4..5b53faf47042 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1354,7 +1354,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->lmc = ppd->lmc; /* OPA logical states match IB logical states */ props->state = driver_lstate(ppd); - props->phys_state = hfi1_ibphys_portstate(ppd); + props->phys_state = driver_pstate(ppd); props->gid_tbl_len = HFI1_GUIDS_PER_PORT; props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); /* see rate_show() in ib core/sysfs.c */ -- cgit v1.2.1 From 13d84914db56c1afd1c9bf4f41e9bf91f061a7dd Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:22:01 -0700 Subject: IB/hfi1,qib: Do not send QKey trap for UD qps According to IBTA spec a QKey violation should not result in a bad qkey trap being triggered for UD queue pairs. Also since it is a silent error we do not increment the q_key violation or the dropped packet counters. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 14 ++++-------- drivers/infiniband/hw/hfi1/ruc.c | 8 +++---- drivers/infiniband/hw/hfi1/ud.c | 37 ++++++++++-------------------- drivers/infiniband/hw/hfi1/verbs.h | 4 ++-- drivers/infiniband/hw/qib/qib_mad.c | 13 ++++------- drivers/infiniband/hw/qib/qib_ruc.c | 20 ++++++++-------- drivers/infiniband/hw/qib/qib_ud.c | 43 ++++++++++++----------------------- drivers/infiniband/hw/qib/qib_verbs.h | 4 ++-- 8 files changed, 54 insertions(+), 89 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index c8daf633212d..a081a98d728a 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -180,10 +180,10 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) } /* - * Send a bad [PQ]_Key trap (ch. 14.3.8). + * Send a bad P_Key trap (ch. 14.3.8). */ -void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, u16 lid1, u16 lid2) +void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, u16 lid1, u16 lid2) { struct opa_mad_notice_attr data; u32 lid = ppd_from_ibp(ibp)->lid; @@ -191,17 +191,13 @@ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, u32 _lid2 = lid2; memset(&data, 0, sizeof(data)); - - if (trap_num == OPA_TRAP_BAD_P_KEY) - ibp->rvp.pkey_violations++; - else - ibp->rvp.qkey_violations++; ibp->rvp.n_pkt_drops++; + ibp->rvp.pkey_violations++; /* Send violation trap */ data.generic_type = IB_NOTICE_TYPE_SECURITY; data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = trap_num; + data.trap_num = OPA_TRAP_BAD_P_KEY; data.issuer_lid = cpu_to_be32(lid); data.ntc_257_258.lid1 = cpu_to_be32(_lid1); data.ntc_257_258.lid2 = cpu_to_be32(_lid2); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 476fe5da2992..9cf506a9a796 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -254,8 +254,8 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) } if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, slid))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, - 0, qp->ibqp.qp_num, slid, dlid); + hfi1_bad_pkey(ibp, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); return 1; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ @@ -290,8 +290,8 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) } if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, slid))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, - 0, qp->ibqp.qp_num, slid, dlid); + hfi1_bad_pkey(ibp, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); return 1; } /* Validate the SLID. See Ch. 9.6.1.5 */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index c995aa58c36a..6bf7a1b08491 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -110,10 +110,10 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) ((1 << ppd->lmc) - 1)); if (unlikely(ingress_pkey_check(ppd, pkey, sc5, qp->s_pkey_index, slid))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - slid, rdma_ah_get_dlid(ah_attr)); + hfi1_bad_pkey(ibp, pkey, + rdma_ah_get_sl(ah_attr), + sqp->ibqp.qp_num, qp->ibqp.qp_num, + slid, rdma_ah_get_dlid(ah_attr)); goto drop; } } @@ -128,18 +128,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) qkey = (int)swqe->ud_wr.remote_qkey < 0 ? sqp->qkey : swqe->ud_wr.remote_qkey; - if (unlikely(qkey != qp->qkey)) { - u16 lid; - - lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1)); - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - lid, - rdma_ah_get_dlid(ah_attr)); - goto drop; - } + if (unlikely(qkey != qp->qkey)) + goto drop; /* silently drop per IBTA spec */ } /* @@ -722,10 +712,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) * for invalid pkeys is optional according to * IB spec (release 1.3, section 10.9.4) */ - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - pkey, sl, - src_qp, qp->ibqp.qp_num, - slid, dlid); + hfi1_bad_pkey(ibp, + pkey, sl, + src_qp, qp->ibqp.qp_num, + slid, dlid); return; } } else { @@ -734,12 +724,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (mgmt_pkey_idx < 0) goto drop; } - if (unlikely(qkey != qp->qkey)) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl, - src_qp, qp->ibqp.qp_num, - slid, dlid); + if (unlikely(qkey != qp->qkey)) /* Silent drop */ return; - } + /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && (tlen > 2048 || (sc5 == 0xF)))) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 17b38cd5f654..fdf1e1fb880c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -236,8 +236,8 @@ static inline int hfi1_send_ok(struct rvt_qp *qp) /* * This must be called with s_lock held. */ -void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, u16 lid1, u16 lid2); +void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, u16 lid1, u16 lid2); void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index da295e0392ed..a4a7f2a76f24 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -134,24 +134,21 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) } /* - * Send a bad [PQ]_Key trap (ch. 14.3.8). + * Send a bad P_Key trap (ch. 14.3.8). */ -void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, __be16 lid1, __be16 lid2) +void qib_bad_pkey(struct qib_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, __be16 lid1, __be16 lid2) { struct ib_mad_notice_attr data; - if (trap_num == IB_NOTICE_TRAP_BAD_PKEY) - ibp->rvp.pkey_violations++; - else - ibp->rvp.qkey_violations++; ibp->rvp.n_pkt_drops++; + ibp->rvp.pkey_violations++; /* Send violation trap */ data.generic_type = IB_NOTICE_TYPE_SECURITY; data.prod_type_msb = 0; data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = trap_num; + data.trap_num = IB_NOTICE_TRAP_BAD_PKEY; data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); data.toggle_count = 0; memset(&data.details, 0, sizeof(data.details)); diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 88d84cbf7e5a..28528459a052 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -256,11 +256,11 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, } if (!qib_pkey_ok((u16)bth0, qib_get_pkey(ibp, qp->s_alt_pkey_index))) { - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, - (u16)bth0, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, - 0, qp->ibqp.qp_num, - hdr->lrh[3], hdr->lrh[1]); + qib_bad_pkey(ibp, + (u16)bth0, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + 0, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); goto err; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ @@ -295,11 +295,11 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, } if (!qib_pkey_ok((u16)bth0, qib_get_pkey(ibp, qp->s_pkey_index))) { - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, - (u16)bth0, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, - 0, qp->ibqp.qp_num, - hdr->lrh[3], hdr->lrh[1]); + qib_bad_pkey(ibp, + (u16)bth0, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + 0, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); goto err; } /* Validate the SLID. See Ch. 9.6.1.5 */ diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 341a123ee95c..be4907453ac4 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -66,8 +66,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) qp = rvt_lookup_qpn(rdi, &ibp->rvp, swqe->ud_wr.remote_qpn); if (!qp) { ibp->rvp.n_pkt_drops++; - rcu_read_unlock(); - return; + goto drop; } sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ? @@ -94,11 +93,11 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (unlikely(!qib_pkey_ok(pkey1, pkey2))) { lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey1, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - cpu_to_be16(lid), - cpu_to_be16(rdma_ah_get_dlid(ah_attr))); + qib_bad_pkey(ibp, pkey1, + rdma_ah_get_sl(ah_attr), + sqp->ibqp.qp_num, qp->ibqp.qp_num, + cpu_to_be16(lid), + cpu_to_be16(rdma_ah_get_dlid(ah_attr))); goto drop; } } @@ -113,18 +112,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) qkey = (int)swqe->ud_wr.remote_qkey < 0 ? sqp->qkey : swqe->ud_wr.remote_qkey; - if (unlikely(qkey != qp->qkey)) { - u16 lid; - - lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1)); - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - cpu_to_be16(lid), - cpu_to_be16(rdma_ah_get_dlid(ah_attr))); + if (unlikely(qkey != qp->qkey)) goto drop; - } } /* @@ -487,22 +476,18 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, pkey1 = be32_to_cpu(ohdr->bth[0]); pkey2 = qib_get_pkey(ibp, qp->s_pkey_index); if (unlikely(!qib_pkey_ok(pkey1, pkey2))) { - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, - pkey1, - (be16_to_cpu(hdr->lrh[0]) >> 4) & + qib_bad_pkey(ibp, + pkey1, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, - src_qp, qp->ibqp.qp_num, - hdr->lrh[3], hdr->lrh[1]); + src_qp, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); return; } } - if (unlikely(qkey != qp->qkey)) { - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, - src_qp, qp->ibqp.qp_num, - hdr->lrh[3], hdr->lrh[1]); + if (unlikely(qkey != qp->qkey)) return; - } + /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && (tlen != 256 || diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index da0db5485ddc..33d5691a9b2d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -241,8 +241,8 @@ static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) return p1 && p1 == p2 && ((__s16)pkey1 < 0 || (__s16)pkey2 < 0); } -void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, __be16 lid1, __be16 lid2); +void qib_bad_pkey(struct qib_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, __be16 lid1, __be16 lid2); void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); void qib_sys_guid_chg(struct qib_ibport *ibp); void qib_node_desc_chg(struct qib_ibport *ibp); -- cgit v1.2.1 From ddbf2efff4ad85135b9fd1cb53b10069a160bd58 Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Fri, 9 Jun 2017 15:59:26 -0700 Subject: IB/hfi1: Fix DC 8051 host info flag array Fix info array of host message flags by adding entry for link width downgrade and reverse values for BC SMA and BC PWR_MSG messages Reviewed-by: Jakub Byczkowski Signed-off-by: Bartlomiej Dudek Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 3fae98d079e4..ebcb2c405c5f 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1012,14 +1012,15 @@ static struct flag_table dc8051_info_err_flags[] = { */ static struct flag_table dc8051_info_host_msg_flags[] = { FLAG_ENTRY0("Host request done", 0x0001), - FLAG_ENTRY0("BC SMA message", 0x0002), - FLAG_ENTRY0("BC PWR_MGM message", 0x0004), + FLAG_ENTRY0("BC PWR_MGM message", 0x0002), + FLAG_ENTRY0("BC SMA message", 0x0004), FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008), FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010), FLAG_ENTRY0("External device config request", 0x0020), FLAG_ENTRY0("VerifyCap all frames received", 0x0040), FLAG_ENTRY0("LinkUp achieved", 0x0080), FLAG_ENTRY0("Link going down", 0x0100), + FLAG_ENTRY0("Link width downgraded", 0x0200), }; static u32 encoded_size(u32 size); -- cgit v1.2.1 From 702265fc0002497fd0ddaae4a5bec00a8a40da3e Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 9 Jun 2017 15:59:33 -0700 Subject: IB/hfi1: Set proper logging levels on QSFP cable error events Change QSFP cable error events logging levels from info to error. Reviewed-by: Jakub Byczkowski Reviewed-by: Dennis Dalessandro Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 64 +++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index ebcb2c405c5f..b8ee0e27dae6 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9376,13 +9376,13 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) || (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING)) - dd_dev_info(dd, "%s: QSFP cable temperature too high\n", - __func__); + dd_dev_err(dd, "%s: QSFP cable temperature too high\n", + __func__); if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) || (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING)) - dd_dev_info(dd, "%s: QSFP cable temperature too low\n", - __func__); + dd_dev_err(dd, "%s: QSFP cable temperature too low\n", + __func__); /* * The remaining alarms/warnings don't matter if the link is down. @@ -9392,75 +9392,75 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) - dd_dev_info(dd, "%s: QSFP supply voltage too high\n", - __func__); + dd_dev_err(dd, "%s: QSFP supply voltage too high\n", + __func__); if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING)) - dd_dev_info(dd, "%s: QSFP supply voltage too low\n", - __func__); + dd_dev_err(dd, "%s: QSFP supply voltage too low\n", + __func__); /* Byte 2 is vendor specific */ if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 1/2 power too high\n", + __func__); if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 1/2 power too low\n", + __func__); if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 3/4 power too high\n", + __func__); if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 3/4 power too low\n", + __func__); if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) || (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too high\n", + __func__); if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) || (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too low\n", + __func__); if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) || (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too high\n", + __func__); if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) || (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too low\n", + __func__); if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 power too high\n", + __func__); if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 power too low\n", + __func__); if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 power too high\n", + __func__); if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 power too low\n", + __func__); /* Bytes 9-10 and 11-12 are reserved */ /* Bytes 13-15 are vendor specific */ -- cgit v1.2.1 From 9c1a99c3882beb9e88ed41d914e75bab2d593926 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 9 Jun 2017 15:59:40 -0700 Subject: IB/hfi1: Create common expected receive verbs/PSM code Declarations and code in common between verbs and PSM are now moved to exp_rcv.[ch]. Reviewed-by: Michael J. Ruhl Reviewed-by: Mitko Haralanov Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/Makefile | 2 +- drivers/infiniband/hw/hfi1/exp_rcv.c | 118 +++++++++++++++++++ drivers/infiniband/hw/hfi1/exp_rcv.h | 187 ++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/file_ops.c | 4 +- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 121 ------------------- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 23 +--- drivers/infiniband/hw/hfi1/user_sdma.c | 38 ------ 7 files changed, 309 insertions(+), 184 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/exp_rcv.c create mode 100644 drivers/infiniband/hw/hfi1/exp_rcv.h diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 88085f65432e..66d538c033b0 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ - eprom.o file_ops.o firmware.o \ + eprom.o exp_rcv.o file_ops.o firmware.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c new file mode 100644 index 000000000000..08d13ed1b574 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -0,0 +1,118 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "exp_rcv.h" +#include "trace.h" + +/** + * exp_tid_group_init - initialize exp_tid_set + * @set - the set + */ +void hfi1_exp_tid_group_init(struct exp_tid_set *set) +{ + INIT_LIST_HEAD(&set->list); + set->count = 0; +} + +/** + * alloc_ctxt_rcv_groups - initialize expected receive groups + * @rcd - the context to add the groupings to + */ +int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + u32 tidbase; + struct tid_group *grp; + int i; + + hfi1_exp_tid_group_init(&rcd->tid_group_list); + hfi1_exp_tid_group_init(&rcd->tid_used_list); + hfi1_exp_tid_group_init(&rcd->tid_full_list); + + tidbase = rcd->expected_base; + for (i = 0; i < rcd->expected_count / + dd->rcv_entries.group_size; i++) { + grp = kzalloc(sizeof(*grp), GFP_KERNEL); + if (!grp) + goto bail; + grp->size = dd->rcv_entries.group_size; + grp->base = tidbase; + tid_group_add_tail(grp, &rcd->tid_group_list); + tidbase += dd->rcv_entries.group_size; + } + + return 0; +bail: + hfi1_free_ctxt_rcv_groups(rcd); + return -ENOMEM; +} + +/** + * free_ctxt_rcv_groups - free expected receive groups + * @rcd - the context to free + * + * The routine dismantles the expect receive linked + * list and clears any tids associated with the receive + * context. + * + * This should only be called for kernel contexts and the + * a base user context. + */ +void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) +{ + struct tid_group *grp, *gptr; + + WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list)); + WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list)); + + list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) { + tid_group_remove(grp, &rcd->tid_group_list); + kfree(grp); + } + + hfi1_clear_tids(rcd); +} diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h new file mode 100644 index 000000000000..c7d02bcddded --- /dev/null +++ b/drivers/infiniband/hw/hfi1/exp_rcv.h @@ -0,0 +1,187 @@ +#ifndef _HFI1_EXP_RCV_H +#define _HFI1_EXP_RCV_H +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "hfi.h" + +#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) + +#define EXP_TID_TIDLEN_MASK 0x7FFULL +#define EXP_TID_TIDLEN_SHIFT 0 +#define EXP_TID_TIDCTRL_MASK 0x3ULL +#define EXP_TID_TIDCTRL_SHIFT 20 +#define EXP_TID_TIDIDX_MASK 0x3FFULL +#define EXP_TID_TIDIDX_SHIFT 22 +#define EXP_TID_GET(tid, field) \ + (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK) + +#define EXP_TID_SET(field, value) \ + (((value) & EXP_TID_TID##field##_MASK) << \ + EXP_TID_TID##field##_SHIFT) +#define EXP_TID_CLEAR(tid, field) ({ \ + (tid) &= ~(EXP_TID_TID##field##_MASK << \ + EXP_TID_TID##field##_SHIFT); \ + }) +#define EXP_TID_RESET(tid, field, value) do { \ + EXP_TID_CLEAR(tid, field); \ + (tid) |= EXP_TID_SET(field, (value)); \ + } while (0) + +/* + * Define fields in the KDETH header so we can update the header + * template. + */ +#define KDETH_OFFSET_SHIFT 0 +#define KDETH_OFFSET_MASK 0x7fff +#define KDETH_OM_SHIFT 15 +#define KDETH_OM_MASK 0x1 +#define KDETH_TID_SHIFT 16 +#define KDETH_TID_MASK 0x3ff +#define KDETH_TIDCTRL_SHIFT 26 +#define KDETH_TIDCTRL_MASK 0x3 +#define KDETH_INTR_SHIFT 28 +#define KDETH_INTR_MASK 0x1 +#define KDETH_SH_SHIFT 29 +#define KDETH_SH_MASK 0x1 +#define KDETH_KVER_SHIFT 30 +#define KDETH_KVER_MASK 0x3 +#define KDETH_JKEY_SHIFT 0x0 +#define KDETH_JKEY_MASK 0xff +#define KDETH_HCRC_UPPER_SHIFT 16 +#define KDETH_HCRC_UPPER_MASK 0xff +#define KDETH_HCRC_LOWER_SHIFT 24 +#define KDETH_HCRC_LOWER_MASK 0xff + +#define KDETH_GET(val, field) \ + (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK) +#define KDETH_SET(dw, field, val) do { \ + u32 dwval = le32_to_cpu(dw); \ + dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \ + dwval |= (((val) & KDETH_##field##_MASK) << \ + KDETH_##field##_SHIFT); \ + dw = cpu_to_le32(dwval); \ + } while (0) + +#define KDETH_RESET(dw, field, val) ({ dw = 0; KDETH_SET(dw, field, val); }) + +/* KDETH OM multipliers and switch over point */ +#define KDETH_OM_SMALL 4 +#define KDETH_OM_SMALL_SHIFT 2 +#define KDETH_OM_LARGE 64 +#define KDETH_OM_LARGE_SHIFT 6 +#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) + +struct tid_group { + struct list_head list; + u32 base; + u8 size; + u8 used; + u8 map; +}; + +/* + * Write an "empty" RcvArray entry. + * This function exists so the TID registaration code can use it + * to write to unused/unneeded entries and still take advantage + * of the WC performance improvements. The HFI will ignore this + * write to the RcvArray entry. + */ +static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) +{ + /* + * Doing the WC fill writes only makes sense if the device is + * present and the RcvArray has been mapped as WC memory. + */ + if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) + writeq(0, dd->rcvarray_wc + (index * 8)); +} + +static inline void tid_group_add_tail(struct tid_group *grp, + struct exp_tid_set *set) +{ + list_add_tail(&grp->list, &set->list); + set->count++; +} + +static inline void tid_group_remove(struct tid_group *grp, + struct exp_tid_set *set) +{ + list_del_init(&grp->list); + set->count--; +} + +static inline void tid_group_move(struct tid_group *group, + struct exp_tid_set *s1, + struct exp_tid_set *s2) +{ + tid_group_remove(group, s1); + tid_group_add_tail(group, s2); +} + +static inline struct tid_group *tid_group_pop(struct exp_tid_set *set) +{ + struct tid_group *grp = + list_first_entry(&set->list, struct tid_group, list); + list_del_init(&grp->list); + set->count--; + return grp; +} + +static inline u32 rcventry2tidinfo(u32 rcventry) +{ + u32 pair = rcventry & ~0x1; + + return EXP_TID_SET(IDX, pair >> 1) | + EXP_TID_SET(CTRL, 1 << (rcventry - pair)); +} + +int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); +void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); +void hfi1_exp_tid_group_init(struct exp_tid_set *set); + +#endif /* _HFI1_EXP_RCV_H */ diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 3158128d57e8..2dd8758f0644 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -804,7 +804,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) dd->rcd[uctxt->ctxt] = NULL; - hfi1_user_exp_rcv_grp_free(uctxt); + hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); uctxt->rcvwait_to = 0; @@ -1260,7 +1260,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd) if (ret) goto setup_failed; - ret = hfi1_user_exp_rcv_grp_init(fd); + ret = hfi1_alloc_ctxt_rcv_groups(uctxt); if (ret) goto setup_failed; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index a8f0aa4722f6..6318e6ca1b18 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -51,14 +51,6 @@ #include "trace.h" #include "mmu_rb.h" -struct tid_group { - struct list_head list; - u32 base; - u8 size; - u8 used; - u8 map; -}; - struct tid_rb_node { struct mmu_rb_node mmu; unsigned long phys; @@ -75,8 +67,6 @@ struct tid_pageset { u16 count; }; -#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) - #define num_user_pages(vaddr, len) \ (1 + (((((unsigned long)(vaddr) + \ (unsigned long)(len) - 1) & PAGE_MASK) - \ @@ -109,88 +99,6 @@ static struct mmu_rb_ops tid_rb_ops = { .invalidate = tid_rb_invalidate }; -static inline u32 rcventry2tidinfo(u32 rcventry) -{ - u32 pair = rcventry & ~0x1; - - return EXP_TID_SET(IDX, pair >> 1) | - EXP_TID_SET(CTRL, 1 << (rcventry - pair)); -} - -static inline void exp_tid_group_init(struct exp_tid_set *set) -{ - INIT_LIST_HEAD(&set->list); - set->count = 0; -} - -static inline void tid_group_remove(struct tid_group *grp, - struct exp_tid_set *set) -{ - list_del_init(&grp->list); - set->count--; -} - -static inline void tid_group_add_tail(struct tid_group *grp, - struct exp_tid_set *set) -{ - list_add_tail(&grp->list, &set->list); - set->count++; -} - -static inline struct tid_group *tid_group_pop(struct exp_tid_set *set) -{ - struct tid_group *grp = - list_first_entry(&set->list, struct tid_group, list); - list_del_init(&grp->list); - set->count--; - return grp; -} - -static inline void tid_group_move(struct tid_group *group, - struct exp_tid_set *s1, - struct exp_tid_set *s2) -{ - tid_group_remove(group, s1); - tid_group_add_tail(group, s2); -} - -int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd) -{ - struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_devdata *dd = fd->dd; - u32 tidbase; - u32 i; - struct tid_group *grp, *gptr; - - exp_tid_group_init(&uctxt->tid_group_list); - exp_tid_group_init(&uctxt->tid_used_list); - exp_tid_group_init(&uctxt->tid_full_list); - - tidbase = uctxt->expected_base; - for (i = 0; i < uctxt->expected_count / - dd->rcv_entries.group_size; i++) { - grp = kzalloc(sizeof(*grp), GFP_KERNEL); - if (!grp) - goto grp_failed; - - grp->size = dd->rcv_entries.group_size; - grp->base = tidbase; - tid_group_add_tail(grp, &uctxt->tid_group_list); - tidbase += dd->rcv_entries.group_size; - } - - return 0; - -grp_failed: - list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, - list) { - list_del_init(&grp->list); - kfree(grp); - } - - return -ENOMEM; -} - /* * Initialize context and file private data needed for Expected * receive caching. This needs to be done after the context has @@ -266,18 +174,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) return ret; } -void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt) -{ - struct tid_group *grp, *gptr; - - list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, - list) { - list_del_init(&grp->list); - kfree(grp); - } - hfi1_clear_tids(uctxt); -} - void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; @@ -302,23 +198,6 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) fd->entry_to_rb = NULL; } -/* - * Write an "empty" RcvArray entry. - * This function exists so the TID registaration code can use it - * to write to unused/unneeded entries and still take advantage - * of the WC performance improvements. The HFI will ignore this - * write to the RcvArray entry. - */ -static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) -{ - /* - * Doing the WC fill writes only makes sense if the device is - * present and the RcvArray has been mapped as WC memory. - */ - if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) - writeq(0, dd->rcvarray_wc + (index * 8)); -} - /* * RcvArray entry allocation for Expected Receives is done by the * following algorithm: diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 5250c897298d..1bdc61be53cb 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -49,29 +49,8 @@ #include "hfi.h" -#define EXP_TID_TIDLEN_MASK 0x7FFULL -#define EXP_TID_TIDLEN_SHIFT 0 -#define EXP_TID_TIDCTRL_MASK 0x3ULL -#define EXP_TID_TIDCTRL_SHIFT 20 -#define EXP_TID_TIDIDX_MASK 0x3FFULL -#define EXP_TID_TIDIDX_SHIFT 22 -#define EXP_TID_GET(tid, field) \ - (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK) +#include "exp_rcv.h" -#define EXP_TID_SET(field, value) \ - (((value) & EXP_TID_TID##field##_MASK) << \ - EXP_TID_TID##field##_SHIFT) -#define EXP_TID_CLEAR(tid, field) ({ \ - (tid) &= ~(EXP_TID_TID##field##_MASK << \ - EXP_TID_TID##field##_SHIFT); \ - }) -#define EXP_TID_RESET(tid, field, value) do { \ - EXP_TID_CLEAR(tid, field); \ - (tid) |= EXP_TID_SET(field, (value)); \ - } while (0) - -void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt); -int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd); void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index fcadbb9978ca..8f7cfdd9e9ec 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -94,27 +94,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* Number of BTH.PSN bits used for sequence number in expected rcvs */ #define BTH_SEQ_MASK 0x7ffull -/* - * Define fields in the KDETH header so we can update the header - * template. - */ -#define KDETH_OFFSET_SHIFT 0 -#define KDETH_OFFSET_MASK 0x7fff -#define KDETH_OM_SHIFT 15 -#define KDETH_OM_MASK 0x1 -#define KDETH_TID_SHIFT 16 -#define KDETH_TID_MASK 0x3ff -#define KDETH_TIDCTRL_SHIFT 26 -#define KDETH_TIDCTRL_MASK 0x3 -#define KDETH_INTR_SHIFT 28 -#define KDETH_INTR_MASK 0x1 -#define KDETH_SH_SHIFT 29 -#define KDETH_SH_MASK 0x1 -#define KDETH_HCRC_UPPER_SHIFT 16 -#define KDETH_HCRC_UPPER_MASK 0xff -#define KDETH_HCRC_LOWER_SHIFT 24 -#define KDETH_HCRC_LOWER_MASK 0xff - #define AHG_KDETH_INTR_SHIFT 12 #define AHG_KDETH_SH_SHIFT 13 #define AHG_KDETH_ARRAY_SIZE 9 @@ -122,16 +101,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) -#define KDETH_GET(val, field) \ - (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK) -#define KDETH_SET(dw, field, val) do { \ - u32 dwval = le32_to_cpu(dw); \ - dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \ - dwval |= (((val) & KDETH_##field##_MASK) << \ - KDETH_##field##_SHIFT); \ - dw = cpu_to_le32(dwval); \ - } while (0) - #define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \ do { \ if ((idx) < ARRAY_SIZE((arr))) \ @@ -142,13 +111,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 return -ERANGE; \ } while (0) -/* KDETH OM multipliers and switch over point */ -#define KDETH_OM_SMALL 4 -#define KDETH_OM_SMALL_SHIFT 2 -#define KDETH_OM_LARGE 64 -#define KDETH_OM_LARGE_SHIFT 6 -#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) - /* Tx request flag bits */ #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ -- cgit v1.2.1 From f5114440c5491d4737b061c3a77195088bbaca52 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 9 Jun 2017 15:59:46 -0700 Subject: IB/hfi1: Remove reading platform configuration from EFI variable Currently, platform configuration can be read from EFI variable for discrete cards. It will happen when reading from EPROM fails. EFI variables should not be queried for platform configuration in any scenario. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index cbda9b189216..41307e474525 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -136,7 +136,6 @@ static void save_platform_config_fields(struct hfi1_devdata *dd) void get_platform_config(struct hfi1_devdata *dd) { int ret = 0; - unsigned long size = 0; u8 *temp_platform_config = NULL; u32 esize; @@ -160,15 +159,6 @@ void get_platform_config(struct hfi1_devdata *dd) dd->platform_config.size = esize; return; } - /* fail, try EFI variable */ - - ret = read_hfi1_efi_var(dd, "configuration", &size, - (void **)&temp_platform_config); - if (!ret) { - dd->platform_config.data = temp_platform_config; - dd->platform_config.size = size; - return; - } } dd_dev_err(dd, "%s: Failed to get platform config, falling back to sub-optimal default file\n", -- cgit v1.2.1 From f523984fb85d16e098aac94642cc16803a4cc61f Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 9 Jun 2017 15:59:53 -0700 Subject: IB/hfi1: Use a template for tid reg/unreg This is the preferred way to add a duplicate trace call. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/trace_rx.h | 46 ++++++++++------------------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 05fc6d68ffe8..7af593827d37 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -138,7 +138,8 @@ TRACE_EVENT(hfi1_receive_interrupt, ) ); -TRACE_EVENT(hfi1_exp_tid_reg, +DECLARE_EVENT_CLASS( + hfi1_exp_tid_reg_unreg, TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, unsigned long va, unsigned long pa, dma_addr_t dma), @@ -172,38 +173,17 @@ TRACE_EVENT(hfi1_exp_tid_reg, ) ); -TRACE_EVENT(hfi1_exp_tid_unreg, - TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, - unsigned long va, unsigned long pa, dma_addr_t dma), - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), - TP_STRUCT__entry( - __field(unsigned int, ctxt) - __field(u16, subctxt) - __field(u32, rarr) - __field(u32, npages) - __field(unsigned long, va) - __field(unsigned long, pa) - __field(dma_addr_t, dma) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->rarr = rarr; - __entry->npages = npages; - __entry->va = va; - __entry->pa = pa; - __entry->dma = dma; - ), - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", - __entry->ctxt, - __entry->subctxt, - __entry->rarr, - __entry->npages, - __entry->pa, - __entry->va, - __entry->dma - ) - ); +DEFINE_EVENT( + hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); + +DEFINE_EVENT( + hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); TRACE_EVENT(hfi1_exp_tid_inval, TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, -- cgit v1.2.1 From 8cb1021b806bda3f5fda6f3699e1f98df14245df Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 9 Jun 2017 15:59:59 -0700 Subject: IB/hfi1: Add traces for TID operations This patch adds a trace for putting a TID and for writing the RcvArray CSR. The CSR access template can be easily extended for additional CSR readq/writeq calls. Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 16 ++------------- drivers/infiniband/hw/hfi1/trace_misc.h | 20 +++++++++++++++++++ drivers/infiniband/hw/hfi1/trace_rx.h | 35 +++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b8ee0e27dae6..937350d9deab 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9745,17 +9745,6 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd) return 0; } -static const char * const pt_names[] = { - "expected", - "eager", - "invalid" -}; - -static const char *pt_name(u32 type) -{ - return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type]; -} - /* * index is the index into the receive array */ @@ -9777,15 +9766,14 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, type, index); goto done; } - - hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx", - pt_name(type), index, pa, (unsigned long)order); + trace_hfi1_put_tid(dd, index, type, pa, order); #define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */ reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK) << RCV_ARRAY_RT_ADDR_SHIFT; + trace_hfi1_write_rcvarray(base + (index * 8), reg); writeq(reg, base + (index * 8)); if (type == PT_EAGER) diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h index deac77ddaeab..8db2253523ff 100644 --- a/drivers/infiniband/hw/hfi1/trace_misc.h +++ b/drivers/infiniband/hw/hfi1/trace_misc.h @@ -72,6 +72,26 @@ TRACE_EVENT(hfi1_interrupt, __entry->src) ); +DECLARE_EVENT_CLASS( + hfi1_csr_template, + TP_PROTO(void __iomem *addr, u64 value), + TP_ARGS(addr, value), + TP_STRUCT__entry( + __field(void __iomem *, addr) + __field(u64, value) + ), + TP_fast_assign( + __entry->addr = addr; + __entry->value = value; + ), + TP_printk("addr %p value %llx", __entry->addr, __entry->value) +); + +DEFINE_EVENT( + hfi1_csr_template, hfi1_write_rcvarray, + TP_PROTO(void __iomem *addr, u64 value), + TP_ARGS(addr, value)); + #ifdef CONFIG_FAULT_INJECTION TRACE_EVENT(hfi1_fault_opcode, TP_PROTO(struct rvt_qp *qp, u8 opcode), diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 7af593827d37..84929578cfe6 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -52,6 +52,13 @@ #include "hfi.h" +#define tidtype_name(type) { PT_##type, #type } +#define show_tidtype(type) \ +__print_symbolic(type, \ + tidtype_name(EXPECTED), \ + tidtype_name(EAGER), \ + tidtype_name(INVALID)) \ + #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_rx @@ -185,6 +192,34 @@ DEFINE_EVENT( unsigned long va, unsigned long pa, dma_addr_t dma), TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); +TRACE_EVENT( + hfi1_put_tid, + TP_PROTO(struct hfi1_devdata *dd, + u32 index, u32 type, unsigned long pa, u16 order), + TP_ARGS(dd, index, type, pa, order), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + __field(unsigned long, pa); + __field(u32, index); + __field(u32, type); + __field(u16, order); + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd); + __entry->pa = pa; + __entry->index = index; + __entry->type = type; + __entry->order = order; + ), + TP_printk("[%s] type %s pa %lx index %u order %u", + __get_str(dev), + show_tidtype(__entry->type), + __entry->pa, + __entry->index, + __entry->order + ) +); + TRACE_EVENT(hfi1_exp_tid_inval, TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, u32 npages, dma_addr_t dma), -- cgit v1.2.1 From 581d01aaaca1fbb9df83cf3337c77e85215dcc5b Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 9 Jun 2017 16:00:06 -0700 Subject: IB/qib: Replace deprecated pci functions with new API pci_enable_msix_range() and pci_disable_msix() have been deprecated. Updating to the new pci_alloc_irq_vectors() interface. Reviewed-by: Sebastian Sanchez Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib.h | 8 +- drivers/infiniband/hw/qib/qib_iba6120.c | 6 +- drivers/infiniband/hw/qib/qib_iba7220.c | 7 +- drivers/infiniband/hw/qib/qib_iba7322.c | 48 +++++----- drivers/infiniband/hw/qib/qib_pcie.c | 149 +++++++++++++------------------- 5 files changed, 98 insertions(+), 120 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index a3e21a25cea5..f9e1c69603a5 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1,7 +1,7 @@ #ifndef _QIB_KERNEL_H #define _QIB_KERNEL_H /* - * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -443,7 +443,7 @@ struct qib_irq_notify; #endif struct qib_msix_entry { - struct msix_entry msix; + int irq; void *arg; #ifdef CONFIG_INFINIBAND_QIB_DCA int dca; @@ -1433,9 +1433,9 @@ int qib_pcie_init(struct pci_dev *, const struct pci_device_id *); int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *, const struct pci_device_id *); void qib_pcie_ddcleanup(struct qib_devdata *); -int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *); +int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent); int qib_reinit_intr(struct qib_devdata *); -void qib_enable_intx(struct pci_dev *); +void qib_enable_intx(struct qib_devdata *dd); void qib_nomsi(struct qib_devdata *); void qib_nomsix(struct qib_devdata *); void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index e423b71e6ea0..46045fc28fa0 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2013 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. @@ -1838,7 +1838,7 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) bail: if (ret) { - if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) + if (qib_pcie_params(dd, dd->lbus_width, NULL)) qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; continuing anyway\n"); /* clear the reset error, init error/hwerror mask */ @@ -3562,7 +3562,7 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, if (qib_mini_init) goto bail; - if (qib_pcie_params(dd, 8, NULL, NULL)) + if (qib_pcie_params(dd, 8, NULL)) qib_dev_err(dd, "Failed to setup PCIe or interrupts; continuing anyway\n"); dd->cspec->irq = pdev->irq; /* save IRQ */ diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index c3679c48e61c..49cd6e3beb72 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2011 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. @@ -2148,7 +2149,7 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) bail: if (ret) { - if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) + if (qib_pcie_params(dd, dd->lbus_width, NULL)) qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; continuing anyway\n"); @@ -3309,7 +3310,7 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd) qib_devinfo(dd->pcidev, "MSI interrupt not detected, trying INTx interrupts\n"); qib_7220_free_irq(dd); - qib_enable_intx(dd->pcidev); + qib_enable_intx(dd); /* * Some newer kernels require free_irq before disable_msi, * and irq can be changed during disable and INTx enable @@ -4619,7 +4620,7 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, minwidth = 8; /* x8 capable boards */ break; } - if (qib_pcie_params(dd, minwidth, NULL, NULL)) + if (qib_pcie_params(dd, minwidth, NULL)) qib_dev_err(dd, "Failed to setup PCIe or interrupts; continuing anyway\n"); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index bb2439fff8fa..2653064ce9e9 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two @@ -2841,10 +2841,10 @@ static void qib_7322_nomsix(struct qib_devdata *dd) reset_dca_notifier(dd, &dd->cspec->msix_entries[i]); #endif irq_set_affinity_hint( - dd->cspec->msix_entries[i].msix.vector, NULL); + dd->cspec->msix_entries[i].irq, NULL); free_cpumask_var(dd->cspec->msix_entries[i].mask); - free_irq(dd->cspec->msix_entries[i].msix.vector, - dd->cspec->msix_entries[i].arg); + free_irq(dd->cspec->msix_entries[i].irq, + dd->cspec->msix_entries[i].arg); } qib_nomsix(dd); } @@ -3336,9 +3336,9 @@ static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) qib_devinfo(dd->pcidev, "Disabling notifier on HCA %d irq %d\n", dd->unit, - m->msix.vector); + m->irq); irq_set_affinity_notifier( - m->msix.vector, + m->irq, NULL); m->notifier = NULL; } @@ -3354,7 +3354,7 @@ static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) int ret; m->notifier = n; - n->notify.irq = m->msix.vector; + n->notify.irq = m->irq; n->notify.notify = qib_irq_notifier_notify; n->notify.release = qib_irq_notifier_release; n->arg = m->arg; @@ -3500,10 +3500,21 @@ try_intx: - 1, QIB_DRV_NAME "%d (kctx)", dd->unit); } - ret = request_irq( - dd->cspec->msix_entries[msixnum].msix.vector, - handler, 0, dd->cspec->msix_entries[msixnum].name, - arg); + + dd->cspec->msix_entries[msixnum].irq = pci_irq_vector( + dd->pcidev, msixnum); + if (dd->cspec->msix_entries[msixnum].irq < 0) { + qib_dev_err(dd, + "Couldn't get MSIx irq (vec=%d): %d\n", + msixnum, + dd->cspec->msix_entries[msixnum].irq); + qib_7322_nomsix(dd); + goto try_intx; + } + ret = request_irq(dd->cspec->msix_entries[msixnum].irq, + handler, 0, + dd->cspec->msix_entries[msixnum].name, + arg); if (ret) { /* * Shouldn't happen since the enable said we could @@ -3512,7 +3523,7 @@ try_intx: qib_dev_err(dd, "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n", msixnum, - dd->cspec->msix_entries[msixnum].msix.vector, + dd->cspec->msix_entries[msixnum].irq, ret); qib_7322_nomsix(dd); goto try_intx; @@ -3548,7 +3559,7 @@ try_intx: dd->cspec->msix_entries[msixnum].mask); } irq_set_affinity_hint( - dd->cspec->msix_entries[msixnum].msix.vector, + dd->cspec->msix_entries[msixnum].irq, dd->cspec->msix_entries[msixnum].mask); } msixnum++; @@ -3744,7 +3755,6 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (msix_entries) { /* restore the MSIx vector address and data if saved above */ for (i = 0; i < msix_entries; i++) { - dd->cspec->msix_entries[i].msix.entry = i; if (!msix_vecsave || !msix_vecsave[2 * i]) continue; qib_write_kreg(dd, 2 * i + @@ -3762,8 +3772,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd) write_7322_initregs(dd); if (qib_pcie_params(dd, dd->lbus_width, - &dd->cspec->num_msix_entries, - dd->cspec->msix_entries)) + &dd->cspec->num_msix_entries)) qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; continuing anyway\n"); @@ -5195,7 +5204,7 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd) qib_devinfo(dd->pcidev, "MSIx interrupt not detected, trying INTx interrupts\n"); qib_7322_nomsix(dd); - qib_enable_intx(dd->pcidev); + qib_enable_intx(dd); qib_setup_7322_interrupt(dd, 0); return 1; } @@ -7327,10 +7336,7 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, if (!dd->cspec->msix_entries) tabsize = 0; - for (i = 0; i < tabsize; i++) - dd->cspec->msix_entries[i].msix.entry = i; - - if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries)) + if (qib_pcie_params(dd, 8, &tabsize)) qib_dev_err(dd, "Failed to setup PCIe or interrupts; continuing anyway\n"); /* may be less than we wanted, if not enough available */ diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index c379b8342a09..d90403e31a9d 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2010 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2008, 2009 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two @@ -187,112 +188,84 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd) pci_set_drvdata(dd->pcidev, NULL); } -static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, - struct qib_msix_entry *qib_msix_entry) -{ - int ret; - int nvec = *msixcnt; - struct msix_entry *msix_entry; - int i; - - ret = pci_msix_vec_count(dd->pcidev); - if (ret < 0) - goto do_intx; - - nvec = min(nvec, ret); - - /* We can't pass qib_msix_entry array to qib_msix_setup - * so use a dummy msix_entry array and copy the allocated - * irq back to the qib_msix_entry array. */ - msix_entry = kcalloc(nvec, sizeof(*msix_entry), GFP_KERNEL); - if (!msix_entry) - goto do_intx; - - for (i = 0; i < nvec; i++) - msix_entry[i] = qib_msix_entry[i].msix; - - ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec); - if (ret < 0) - goto free_msix_entry; - else - nvec = ret; - - for (i = 0; i < nvec; i++) - qib_msix_entry[i].msix = msix_entry[i]; - - kfree(msix_entry); - *msixcnt = nvec; - return; - -free_msix_entry: - kfree(msix_entry); - -do_intx: - qib_dev_err( - dd, - "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n", - nvec, ret); - *msixcnt = 0; - qib_enable_intx(dd->pcidev); -} - /** * We save the msi lo and hi values, so we can restore them after * chip reset (the kernel PCI infrastructure doesn't yet handle that * correctly. */ -static int qib_msi_setup(struct qib_devdata *dd, int pos) +static void qib_msi_setup(struct qib_devdata *dd, int pos) { struct pci_dev *pdev = dd->pcidev; u16 control; - int ret; - ret = pci_enable_msi(pdev); - if (ret) - qib_dev_err(dd, - "pci_enable_msi failed: %d, interrupts may not work\n", - ret); - /* continue even if it fails, we may still be OK... */ - - pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, - &dd->msi_lo); - pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, - &dd->msi_hi); + pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, &dd->msi_lo); + pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, &dd->msi_hi); pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &control); + /* now save the data (vector) info */ - pci_read_config_word(pdev, pos + ((control & PCI_MSI_FLAGS_64BIT) - ? 12 : 8), + pci_read_config_word(pdev, + pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8), &dd->msi_data); - return ret; } -int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, - struct qib_msix_entry *entry) +static int qib_allocate_irqs(struct qib_devdata *dd, u32 maxvec) +{ + unsigned int flags = PCI_IRQ_LEGACY; + + /* Check our capabilities */ + if (dd->pcidev->msix_cap) { + flags |= PCI_IRQ_MSIX; + } else { + if (dd->pcidev->msi_cap) { + flags |= PCI_IRQ_MSI; + /* Get msi_lo and msi_hi */ + qib_msi_setup(dd, dd->pcidev->msi_cap); + } + } + + if (!(flags & (PCI_IRQ_MSIX | PCI_IRQ_MSI))) + qib_dev_err(dd, "No PCI MSI or MSIx capability!\n"); + + return pci_alloc_irq_vectors(dd->pcidev, 1, maxvec, flags); +} + +int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent) { u16 linkstat, speed; - int pos = 0, ret = 1; + int nvec; + int maxvec; + int ret = 0; if (!pci_is_pcie(dd->pcidev)) { qib_dev_err(dd, "Can't find PCI Express capability!\n"); /* set up something... */ dd->lbus_width = 1; dd->lbus_speed = 2500; /* Gen1, 2.5GHz */ + ret = -1; goto bail; } - pos = dd->pcidev->msix_cap; - if (nent && *nent && pos) { - qib_msix_setup(dd, pos, nent, entry); - ret = 0; /* did it, either MSIx or INTx */ - } else { - pos = dd->pcidev->msi_cap; - if (pos) - ret = qib_msi_setup(dd, pos); - else - qib_dev_err(dd, "No PCI MSI or MSIx capability!\n"); + maxvec = (nent && *nent) ? *nent : 1; + nvec = qib_allocate_irqs(dd, maxvec); + if (nvec < 0) { + ret = nvec; + goto bail; + } + + /* + * If nent exists, make sure to record how many vectors were allocated + */ + if (nent) { + *nent = nvec; + + /* + * If we requested (nent) MSIX, but msix_enabled is not set, + * pci_alloc_irq_vectors() enabled INTx. + */ + if (!dd->pcidev->msix_enabled) + qib_dev_err(dd, + "no msix vectors allocated, using INTx\n"); } - if (!pos) - qib_enable_intx(dd->pcidev); pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); /* @@ -379,7 +352,7 @@ int qib_reinit_intr(struct qib_devdata *dd) ret = 1; bail: if (!ret && (dd->flags & QIB_HAS_INTX)) { - qib_enable_intx(dd->pcidev); + qib_enable_intx(dd); ret = 1; } @@ -397,7 +370,7 @@ bail: void qib_nomsi(struct qib_devdata *dd) { dd->msi_lo = 0; - pci_disable_msi(dd->pcidev); + pci_free_irq_vectors(dd->pcidev); } /* @@ -405,23 +378,21 @@ void qib_nomsi(struct qib_devdata *dd) */ void qib_nomsix(struct qib_devdata *dd) { - pci_disable_msix(dd->pcidev); + pci_free_irq_vectors(dd->pcidev); } /* * Similar to pci_intx(pdev, 1), except that we make sure * msi(x) is off. */ -void qib_enable_intx(struct pci_dev *pdev) +void qib_enable_intx(struct qib_devdata *dd) { u16 cw, new; int pos; + struct pci_dev *pdev = dd->pcidev; - /* first, turn on INTx */ - pci_read_config_word(pdev, PCI_COMMAND, &cw); - new = cw & ~PCI_COMMAND_INTX_DISABLE; - if (new != cw) - pci_write_config_word(pdev, PCI_COMMAND, new); + if (pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_LEGACY) < 0) + qib_dev_err(dd, "Failed to enable INTx\n"); pos = pdev->msi_cap; if (pos) { -- cgit v1.2.1 From fe4e74eeb24286c730672e776ac4c2c3caa19137 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 9 Jun 2017 16:00:12 -0700 Subject: IB/hfi1: Initialize TID lists to avoid crash on cleanup The expected receive lists (tid_xxx_list) are not initialized until late in the receive context initialization. If an error happens before the initialization, a NULL pointer access will occur during cleanup. Initialized the lists sooner rather than later to avoid this Oops: IP: unlock_exp_tids.isra.11+0x26/0xd0 [hfi1] RIP: 0010:unlock_exp_tids.isra.11+0x26/0xd0 [hfi1] Call Trace: hfi1_user_exp_rcv_free+0x79/0xb0 [hfi1] hfi1_file_close+0x87/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/exp_rcv.c | 4 ---- drivers/infiniband/hw/hfi1/init.c | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index 08d13ed1b574..0af91675acc6 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -69,10 +69,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) struct tid_group *grp; int i; - hfi1_exp_tid_group_init(&rcd->tid_group_list); - hfi1_exp_tid_group_init(&rcd->tid_used_list); - hfi1_exp_tid_group_init(&rcd->tid_full_list); - tidbase = rcd->expected_base; for (i = 0; i < rcd->expected_count / dd->rcv_entries.group_size; i++) { diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 4a11d4da4c92..a00308ccf016 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -67,6 +67,7 @@ #include "aspm.h" #include "affinity.h" #include "vnic.h" +#include "exp_rcv.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -221,6 +222,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, hfi1_cdbg(PROC, "setting up context %u\n", ctxt); INIT_LIST_HEAD(&rcd->qp_wait_list); + hfi1_exp_tid_group_init(&rcd->tid_group_list); + hfi1_exp_tid_group_init(&rcd->tid_used_list); + hfi1_exp_tid_group_init(&rcd->tid_full_list); rcd->ppd = ppd; rcd->dd = dd; __set_bit(0, rcd->in_use_ctxts); -- cgit v1.2.1 From f683c80ca68e087b55c6f9ab6ca6beb88ebc6d69 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 9 Jun 2017 16:00:19 -0700 Subject: IB/hfi1: Resolve kernel panics by reference counting receive contexts Base receive contexts can be used by sub contexts. Because of this, resources for the context cannot be completely freed until all sub contexts are done using the base context. Introduce a reference count so that the base receive context can be freed only when all sub contexts are done with it. Use the provided function call for setting default send context integrity rather than the manual method. The cleanup path does not set all variables back to NULL after freeing resources. Since the clean up code can get called more than once, (e.g. during context close and on the error path), it is necessary to make sure that all the variables are NULLed. Possible crash are: BUG: unable to handle kernel paging request at 0000000001908900 IP: read_csr+0x24/0x30 [hfi1] RIP: 0010:read_csr+0x24/0x30 [hfi1] Call Trace: sc_disable+0x40/0x110 [hfi1] hfi1_file_close+0x16f/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 or kernel BUG at mm/slub.c:3877! RIP: 0010:kfree+0x14f/0x170 Call Trace: hfi1_free_ctxtdata+0x19a/0x2b0 [hfi1] ? hfi1_user_exp_rcv_grp_free+0x73/0x80 [hfi1] hfi1_file_close+0x20f/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 Fixes: Commit 62239fc6e554 ("IB/hfi1: Clean up on context initialization failure") Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 39 +++++++++++++++-------- drivers/infiniband/hw/hfi1/hfi.h | 11 +++---- drivers/infiniband/hw/hfi1/init.c | 58 ++++++++++++++++++++++++++++------ drivers/infiniband/hw/hfi1/vnic_main.c | 11 ++++--- 4 files changed, 85 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 2dd8758f0644..bbf80b1dd9d9 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -774,6 +774,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) *ev = 0; __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); + fdata->uctxt = NULL; + hfi1_rcd_put(uctxt); /* fdata reference */ if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { mutex_unlock(&hfi1_mutex); goto done; @@ -794,16 +796,15 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) /* Clear the context's J_KEY */ hfi1_clear_ctxt_jkey(dd, uctxt->ctxt); /* - * Reset context integrity checks to default. - * (writes to CSRs probably belong in chip.c) + * If a send context is allocated, reset context integrity + * checks to default and disable the send context. */ - write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, - hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type)); - sc_disable(uctxt->sc); + if (uctxt->sc) { + set_pio_integrity(uctxt->sc); + sc_disable(uctxt->sc); + } spin_unlock_irqrestore(&dd->uctxt_lock, flags); - dd->rcd[uctxt->ctxt] = NULL; - hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); @@ -816,8 +817,11 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) hfi1_stats.sps_ctxts--; if (++dd->freectxts == dd->num_user_contexts) aspm_enable_all(dd); + + /* _rcd_put() should be done after releasing mutex */ + dd->rcd[uctxt->ctxt] = NULL; mutex_unlock(&hfi1_mutex); - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); /* dd reference */ done: mmdrop(fdata->mm); kobject_put(&dd->kobj); @@ -887,16 +891,17 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) ret = wait_event_interruptible(fd->uctxt->wait, !test_bit( HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); - if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) { - clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); - return -ENOMEM; - } + if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) + ret = -ENOMEM; + /* The only thing a sub context needs is the user_xxx stuff */ if (!ret) ret = init_user_ctxt(fd); - if (ret) + if (ret) { clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); + hfi1_rcd_put(fd->uctxt); + } } else if (!ret) { ret = setup_base_ctxt(fd); if (fd->uctxt->subctxt_cnt) { @@ -961,6 +966,8 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, fd->uctxt = uctxt; fd->subctxt = subctxt; + + hfi1_rcd_get(uctxt); __set_bit(fd->subctxt, uctxt->in_use_ctxts); return 1; @@ -1069,11 +1076,14 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, aspm_disable_all(dd); fd->uctxt = uctxt; + /* Count the reference for the fd */ + hfi1_rcd_get(uctxt); + return 0; ctxdata_free: dd->rcd[ctxt] = NULL; - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); return ret; } @@ -1273,6 +1283,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd) return 0; setup_failed: + /* Call _free_ctxtdata, not _rcd_put(). We still need the context. */ hfi1_free_ctxtdata(dd, uctxt); return ret; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index bca781c3b5ac..1a33a5087734 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -213,11 +213,9 @@ struct hfi1_ctxtdata { /* dynamic receive available interrupt timeout */ u32 rcvavail_timeout; - /* - * number of opens (including slave sub-contexts) on this instance - * (ignoring forks, dup, etc. for now) - */ - int cnt; + /* Reference count the base context usage */ + struct kref kref; + /* Device context index */ unsigned ctxt; /* @@ -1291,7 +1289,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port); void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); - +int hfi1_rcd_put(struct hfi1_ctxtdata *rcd); +void hfi1_rcd_get(struct hfi1_ctxtdata *rcd); int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index a00308ccf016..dfdb4126ca05 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -191,15 +191,45 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) nomem: ret = -ENOMEM; - if (dd->rcd) { - for (i = 0; i < dd->num_rcv_contexts; ++i) - hfi1_free_ctxtdata(dd, dd->rcd[i]); - } + for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) + hfi1_rcd_put(dd->rcd[i]); + + /* All the contexts should be freed, free the array */ kfree(dd->rcd); dd->rcd = NULL; return ret; } +/* + * Helper routines for the receive context reference count (rcd and uctxt) + */ +static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd) +{ + kref_init(&rcd->kref); +} + +static void hfi1_rcd_free(struct kref *kref) +{ + struct hfi1_ctxtdata *rcd = + container_of(kref, struct hfi1_ctxtdata, kref); + + hfi1_free_ctxtdata(rcd->dd, rcd); + kfree(rcd); +} + +int hfi1_rcd_put(struct hfi1_ctxtdata *rcd) +{ + if (rcd) + return kref_put(&rcd->kref, hfi1_rcd_free); + + return 0; +} + +void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) +{ + kref_get(&rcd->kref); +} + /* * Common code for user and kernel context setup. */ @@ -332,6 +362,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, if (!rcd->opstats) goto bail; } + + hfi1_rcd_init(rcd); } return rcd; bail: @@ -931,14 +963,11 @@ static void shutdown_device(struct hfi1_devdata *dd) * @rcd: the ctxtdata structure * * free up any allocated data for a context - * This should not touch anything that would affect a simultaneous - * re-allocation of context data, because it is called after hfi1_mutex - * is released (and can be called from reinit as well). * It should never change any chip state, or global driver state. */ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { - unsigned e; + u32 e; if (!rcd) return; @@ -957,6 +986,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) /* all the RcvArray entries should have been cleared by now */ kfree(rcd->egrbufs.rcvtids); + rcd->egrbufs.rcvtids = NULL; for (e = 0; e < rcd->egrbufs.alloced; e++) { if (rcd->egrbufs.buffers[e].dma) @@ -966,13 +996,21 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) rcd->egrbufs.buffers[e].dma); } kfree(rcd->egrbufs.buffers); + rcd->egrbufs.alloced = 0; + rcd->egrbufs.buffers = NULL; sc_free(rcd->sc); + rcd->sc = NULL; + vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); kfree(rcd->opstats); - kfree(rcd); + + rcd->subctxt_uregbase = NULL; + rcd->subctxt_rcvegrbuf = NULL; + rcd->subctxt_rcvhdr_base = NULL; + rcd->opstats = NULL; } /* @@ -1366,7 +1404,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) tmp[ctxt] = NULL; /* debugging paranoia */ if (rcd) { hfi1_clear_tids(rcd); - hfi1_free_ctxtdata(dd, rcd); + hfi1_rcd_put(rcd); } } kfree(tmp); diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index b601c2929f8f..950c1b4df442 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -156,11 +156,11 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, return ret; bail: /* - * hfi1_free_ctxtdata() also releases send_context - * structure if uctxt->sc is not null + * hfi1_rcd_put() will call hfi1_free_ctxtdata(), which will + * release send_context structure if uctxt->sc is not null */ dd->rcd[uctxt->ctxt] = NULL; - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); return ret; } @@ -208,7 +208,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, hfi1_clear_ctxt_pkey(dd, uctxt); hfi1_stats.sps_ctxts--; - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); } void hfi1_vnic_setup(struct hfi1_devdata *dd) @@ -751,6 +751,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); if (rc) break; + hfi1_rcd_get(dd->vnic.ctxt[i]); dd->vnic.ctxt[i]->vnic_q_idx = i; } @@ -762,6 +763,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) */ while (i-- > dd->vnic.num_ctxt) { deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + hfi1_rcd_put(dd->vnic.ctxt[i]); dd->vnic.ctxt[i] = NULL; } goto alloc_fail; @@ -791,6 +793,7 @@ static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) if (--dd->vnic.num_vports == 0) { for (i = 0; i < dd->vnic.num_ctxt; i++) { deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + hfi1_rcd_put(dd->vnic.ctxt[i]); dd->vnic.ctxt[i] = NULL; } hfi1_deinit_vnic_rsm(dd); -- cgit v1.2.1 From bc5214ee29220251e5507882696ded5ca183b169 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 9 Jun 2017 16:00:26 -0700 Subject: IB/hfi1: Handle missing magic values in config file Driver does not check whether proper configuration file exist in EPROM, and treats empty partition as possible valid configuration, preventing fallback to default firmware. Change EPROM read function to treat missing magic number as read error. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/eprom.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c index 26da124c88e2..d46b17107901 100644 --- a/drivers/infiniband/hw/hfi1/eprom.c +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -250,7 +250,6 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, { void *buffer; void *p; - u32 length; int ret; buffer = kmalloc(P1_SIZE, GFP_KERNEL); @@ -265,13 +264,13 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, /* scan for image magic that may trail the actual data */ p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE); - if (p) - length = p - buffer; - else - length = P1_SIZE; + if (!p) { + kfree(buffer); + return -ENOENT; + } *data = buffer; - *size = length; + *size = p - buffer; return 0; } -- cgit v1.2.1