Skip to content

Commit 98ef77d

Browse files
chuckleveramschuma-ntap
authored andcommitted
xprtrdma: Send Queue size grows after a reconnect
Eli Dorfman reports that after a series of idle disconnects, an RPC/RDMA transport becomes unusable (rdma_create_qp returns -ENOMEM). Problem was tracked down to increasing Send Queue size after each reconnect. The rdma_create_qp() API does not promise to leave its @qp_init_attr parameter unaltered. In fact, some drivers do modify one or more of its fields. Thus our calls to rdma_create_qp must use a fresh copy of ib_qp_init_attr each time. This fix is appropriate for kernels dating back to late 2007, though it will have to be adapted, as the connect code has changed over the years. Reported-by: Eli Dorfman <eli@vastdata.com> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
1 parent f9e1afe commit 98ef77d

File tree

1 file changed

+14
-12
lines changed

1 file changed

+14
-12
lines changed

net/sunrpc/xprtrdma/verbs.c

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -606,10 +606,10 @@ void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
606606
* Unlike a normal reconnection, a fresh PD and a new set
607607
* of MRs and buffers is needed.
608608
*/
609-
static int
610-
rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
611-
struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
609+
static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
610+
struct ib_qp_init_attr *qp_init_attr)
612611
{
612+
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
613613
int rc, err;
614614

615615
trace_xprtrdma_reinsert(r_xprt);
@@ -626,7 +626,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
626626
}
627627

628628
rc = -ENETUNREACH;
629-
err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
629+
err = rdma_create_qp(ia->ri_id, ia->ri_pd, qp_init_attr);
630630
if (err) {
631631
pr_err("rpcrdma: rdma_create_qp returned %d\n", err);
632632
goto out3;
@@ -643,16 +643,16 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
643643
return rc;
644644
}
645645

646-
static int
647-
rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
648-
struct rpcrdma_ia *ia)
646+
static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt,
647+
struct ib_qp_init_attr *qp_init_attr)
649648
{
649+
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
650650
struct rdma_cm_id *id, *old;
651651
int err, rc;
652652

653653
trace_xprtrdma_reconnect(r_xprt);
654654

655-
rpcrdma_ep_disconnect(ep, ia);
655+
rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia);
656656

657657
rc = -EHOSTUNREACH;
658658
id = rpcrdma_create_id(r_xprt, ia);
@@ -674,7 +674,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
674674
goto out_destroy;
675675
}
676676

677-
err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
677+
err = rdma_create_qp(id, ia->ri_pd, qp_init_attr);
678678
if (err)
679679
goto out_destroy;
680680

@@ -699,25 +699,27 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
699699
struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
700700
rx_ia);
701701
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
702+
struct ib_qp_init_attr qp_init_attr;
702703
int rc;
703704

704705
retry:
706+
memcpy(&qp_init_attr, &ep->rep_attr, sizeof(qp_init_attr));
705707
switch (ep->rep_connected) {
706708
case 0:
707709
dprintk("RPC: %s: connecting...\n", __func__);
708-
rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
710+
rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &qp_init_attr);
709711
if (rc) {
710712
rc = -ENETUNREACH;
711713
goto out_noupdate;
712714
}
713715
break;
714716
case -ENODEV:
715-
rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia);
717+
rc = rpcrdma_ep_recreate_xprt(r_xprt, &qp_init_attr);
716718
if (rc)
717719
goto out_noupdate;
718720
break;
719721
default:
720-
rc = rpcrdma_ep_reconnect(r_xprt, ep, ia);
722+
rc = rpcrdma_ep_reconnect(r_xprt, &qp_init_attr);
721723
if (rc)
722724
goto out;
723725
}

0 commit comments

Comments
 (0)