net/smc: check for healthy link group resp. connections

If a problem for at least one connection of a link group is detected,
the whole link group and all its connections are terminated.
This patch adds a check for healthy link group when trying to reserve
a work request, and checks for healthy connections before starting
a tx worker.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Ursula Braun 2018-01-25 11:15:36 +01:00 committed by David S. Miller
parent 732720fafd
commit 1a0a04c7a8
4 changed files with 29 additions and 12 deletions

View File

@ -65,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_cdc_tx_pend **pend)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
int rc;
return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
(struct smc_wr_tx_pend_priv **)pend);
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
(struct smc_wr_tx_pend_priv **)pend);
if (!conn->alert_token_local)
/* abnormal termination */
rc = -EPIPE;
return rc;
}
static inline void smc_cdc_add_pending_send(struct smc_connection *conn,

View File

@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
goto errout;
if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) {
if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
smc->conn.alert_token_local) {
struct smc_connection *conn = &smc->conn;
struct smc_diag_conninfo cinfo = {
.token = conn->alert_token_local,
@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout;
}
if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) {
if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
.lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,

View File

@ -408,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
goto out_unlock;
}
rc = 0;
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
if (conn->alert_token_local) /* connection healthy */
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
}
goto out_unlock;
}
@ -440,10 +441,17 @@ static void smc_tx_work(struct work_struct *work)
int rc;
lock_sock(&smc->sk);
if (smc->sk.sk_err ||
!conn->alert_token_local ||
conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
goto out;
rc = smc_tx_sndbuf_nonempty(conn);
if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
!atomic_read(&conn->bytes_to_rcv))
conn->local_rx_ctrl.prod_flags.write_blocked = 0;
out:
release_sock(&smc->sk);
}
@ -464,7 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
if (smc_cdc_get_slot_and_msg_send(conn) < 0) {
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
conn->alert_token_local) { /* connection healthy */
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
return;

View File

@ -174,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv **wr_pend_priv)
{
struct smc_wr_tx_pend *wr_pend;
u32 idx = link->wr_tx_cnt;
struct ib_send_wr *wr_ib;
u64 wr_id;
u32 idx;
int rc;
*wr_buf = NULL;
@ -186,16 +186,17 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
if (rc)
return rc;
} else {
struct smc_link_group *lgr;
lgr = container_of(link, struct smc_link_group,
lnk[SMC_SINGLE_LINK]);
rc = wait_event_timeout(
link->wr_tx_wait,
list_empty(&lgr->list) || /* lgr terminated */
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
/* timeout - terminate connections */
struct smc_link_group *lgr;
lgr = container_of(link, struct smc_link_group,
lnk[SMC_SINGLE_LINK]);
smc_lgr_terminate(lgr);
return -EPIPE;
}