diff options
-rw-r--r-- | etc/iscsid.conf | 2 | ||||
-rw-r--r-- | include/iscsi_if.h | 2 | ||||
-rw-r--r-- | kernel/iscsi_tcp.c | 212 | ||||
-rw-r--r-- | kernel/iscsi_tcp.h | 1 | ||||
-rw-r--r-- | kernel/scsi_transport_iscsi.c | 43 | ||||
-rw-r--r-- | kernel/scsi_transport_iscsi.h | 7 | ||||
-rw-r--r-- | usr/idbm.c | 3 | ||||
-rw-r--r-- | usr/initiator.c | 315 | ||||
-rw-r--r-- | usr/initiator.h | 1 | ||||
-rw-r--r-- | usr/util.c | 2 |
10 files changed, 386 insertions, 202 deletions
diff --git a/etc/iscsid.conf b/etc/iscsid.conf index 14ff770..9406073 100644 --- a/etc/iscsid.conf +++ b/etc/iscsid.conf @@ -6,7 +6,7 @@ node.active_cnx = 1 node.startup = manual #node.session.auth.username = dima #node.session.auth.password = aloha -node.session.timeo.replacement_timeout = 0 +node.session.timeo.replacement_timeout = 120 node.session.err_timeo.abort_timeout = 10 node.session.err_timeo.reset_timeout = 30 node.session.iscsi.InitialR2T = No diff --git a/include/iscsi_if.h b/include/iscsi_if.h index 776f8e4..6819c2e 100644 --- a/include/iscsi_if.h +++ b/include/iscsi_if.h @@ -174,6 +174,7 @@ enum iscsi_param { ISCSI_PARAM_TPGT, ISCSI_PARAM_PERSISTENT_ADDRESS, ISCSI_PARAM_PERSISTENT_PORT, + ISCSI_PARAM_SESS_RECOVERY_TMO, /* pased in through bind conn using transport_fd */ ISCSI_PARAM_CONN_PORT, @@ -201,6 +202,7 @@ enum iscsi_param { #define ISCSI_TPGT (1 << ISCSI_PARAM_TPGT) #define ISCSI_PERSISTENT_ADDRESS (1 << ISCSI_PARAM_PERSISTENT_ADDRESS) #define ISCSI_PERSISTENT_PORT (1 << ISCSI_PARAM_PERSISTENT_PORT) +#define ISCSI_SESS_RECOVERY_TMO (1 << ISCSI_PARAM_SESS_RECOVERY_TMO) #define ISCSI_CONN_PORT (1 << ISCSI_PARAM_CONN_PORT) #define ISCSI_CONN_ADDRESS (1 << ISCSI_PARAM_CONN_ADDRESS) diff --git a/kernel/iscsi_tcp.c b/kernel/iscsi_tcp.c index 9af028c..044a3c6 100644 --- a/kernel/iscsi_tcp.c +++ b/kernel/iscsi_tcp.c @@ -82,6 +82,9 @@ module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); /* global data */ static kmem_cache_t *taskcache; +#define session_to_cls(_sess) \ + hostdata_session(_sess->host->hostdata) + static inline void iscsi_buf_init_virt(struct iscsi_buf *ibuf, char *vbuf, int size) { @@ -1692,7 +1695,7 @@ iscsi_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask) if (mtask->data_count) mtask->xmstate |= XMSTATE_IMM_DATA; if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE && - conn->stop_stage != STOP_CONN_RECOVER && + conn->stop_stage != STOP_CONN_RECOVER && conn->hdrdgst_en) iscsi_hdr_digest(conn, &mtask->headbuf, (u8*)mtask->hdrext); @@ -2290,11 +2293,14 @@ iscsi_xmitworker(void *data) mutex_unlock(&conn->xmitmutex); } -#define FAILURE_BAD_HOST 1 -#define FAILURE_SESSION_FAILED 2 -#define FAILURE_SESSION_FREED 3 -#define FAILURE_WINDOW_CLOSED 4 -#define FAILURE_SESSION_TERMINATE 5 +enum { + FAILURE_BAD_HOST = 1, + FAILURE_SESSION_FAILED, + FAILURE_SESSION_FREED, + FAILURE_WINDOW_CLOSED, + FAILURE_SESSION_TERMINATE, + FAILURE_SESSION_RECOVERY_TIMEOUT, +}; static int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) @@ -2314,7 +2320,10 @@ iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) spin_lock(&session->lock); if (session->state != ISCSI_STATE_LOGGED_IN) { - if (session->state == ISCSI_STATE_FAILED) { + if (session->recovery_failed) { + reason = FAILURE_SESSION_RECOVERY_TIMEOUT; + goto fault; + } else if (session->state == ISCSI_STATE_FAILED) { reason = FAILURE_SESSION_FAILED; goto reject; } else if (session->state == ISCSI_STATE_TERMINATE) { @@ -2709,6 +2718,22 @@ iscsi_conn_bind(struct iscsi_cls_session *cls_session, return 0; } +static void +iscsi_session_recovery_timedout(struct iscsi_cls_session *csession) +{ + struct Scsi_Host *shost = iscsi_session_to_shost(csession); + struct iscsi_session *session = iscsi_hostdata(shost->hostdata); + struct iscsi_conn *conn = session->leadconn; + + spin_lock_bh(&session->lock); + if (session->state != ISCSI_STATE_LOGGED_IN) { + session->recovery_failed = 1; + if (conn) + wake_up(&conn->ehwait); + } + spin_unlock_bh(&session->lock); +} + static int iscsi_conn_start(struct iscsi_cls_conn *cls_conn) { @@ -2724,7 +2749,6 @@ iscsi_conn_start(struct iscsi_cls_conn *cls_conn) } sk = conn->sock->sk; - write_lock_bh(&sk->sk_callback_lock); spin_lock_bh(&session->lock); conn->c_stage = ISCSI_CONN_STARTED; @@ -2740,8 +2764,13 @@ iscsi_conn_start(struct iscsi_cls_conn *cls_conn) conn->stop_stage = 0; conn->tmabort_state = TMABORT_INITIAL; session->age++; + session->recovery_failed = 0; + spin_unlock_bh(&session->lock); + write_unlock_bh(&sk->sk_callback_lock); + + iscsi_unblock_session(session_to_cls(session)); wake_up(&conn->ehwait); - break; + return 0; case STOP_CONN_TERM: session->conn_cnt++; conn->stop_stage = 0; @@ -2844,76 +2873,112 @@ flush_control_queues(struct iscsi_session *session, struct iscsi_conn *conn) conn->mtask = NULL; } - static void -iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) +iscsi_suspend_conn_rx(struct iscsi_conn *conn) { - struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_session *session = conn->session; struct sock *sk; - unsigned long flags; BUG_ON(!conn->sock); sk = conn->sock->sk; write_lock_bh(&sk->sk_callback_lock); set_bit(SUSPEND_BIT, &conn->suspend_rx); write_unlock_bh(&sk->sk_callback_lock); +} - mutex_lock(&conn->xmitmutex); - - spin_lock_irqsave(session->host->host_lock, flags); - spin_lock(&session->lock); +static void +iscsi_start_session_recovery(struct iscsi_session *session, + struct iscsi_conn *conn, int flag) +{ + spin_lock_bh(&session->lock); + if (conn->stop_stage == STOP_CONN_RECOVER || + conn->stop_stage == STOP_CONN_TERM) { + spin_unlock_bh(&session->lock); + return; + } conn->stop_stage = flag; + spin_unlock_bh(&session->lock); + + iscsi_suspend_conn_rx(conn); + + mutex_lock(&conn->xmitmutex); + spin_lock_bh(&session->lock); conn->c_stage = ISCSI_CONN_STOPPED; set_bit(SUSPEND_BIT, &conn->suspend_tx); - if (flag != STOP_CONN_SUSPEND) - session->conn_cnt--; - + session->conn_cnt--; if (session->conn_cnt == 0 || session->leadconn == conn) session->state = ISCSI_STATE_FAILED; - spin_unlock(&session->lock); - spin_unlock_irqrestore(session->host->host_lock, flags); + spin_unlock_bh(&session->lock); - if (flag == STOP_CONN_TERM || flag == STOP_CONN_RECOVER) { - /* - * Socket must go now. - */ - sock_hold(conn->sock->sk); - iscsi_conn_restore_callbacks(conn); - sock_put(conn->sock->sk); + /* + * Socket must go now. + */ + sock_hold(conn->sock->sk); + iscsi_conn_restore_callbacks(conn); + sock_put(conn->sock->sk); - /* - * flush queues. - */ - spin_lock_bh(&session->lock); - fail_all_commands(session, conn); - flush_control_queues(session, conn); - spin_unlock_bh(&session->lock); + /* + * flush queues. + */ + spin_lock_bh(&session->lock); + fail_all_commands(session, conn); + flush_control_queues(session, conn); + spin_unlock_bh(&session->lock); - /* - * release socket only after we stopped data_xmit() - * activity and flushed all outstandings - */ - sock_release(conn->sock); - conn->sock = NULL; + /* + * release socket only after we stopped data_xmit() + * activity and flushed all outstandings + */ + sock_release(conn->sock); + conn->sock = NULL; - /* - * for connection level recovery we should not calculate - * header digest. conn->hdr_size used for optimization - * in hdr_extract() and will be re-negotiated at - * set_param() time. - */ - if (flag == STOP_CONN_RECOVER) { - conn->hdr_size = sizeof(struct iscsi_hdr); - conn->hdrdgst_en = 0; - conn->datadgst_en = 0; - } + /* + * for connection level recovery we should not calculate + * header digest. conn->hdr_size used for optimization + * in hdr_extract() and will be re-negotiated at + * set_param() time. + */ + if (flag == STOP_CONN_RECOVER) { + conn->hdr_size = sizeof(struct iscsi_hdr); + conn->hdrdgst_en = 0; + conn->datadgst_en = 0; + + if (session->state == ISCSI_STATE_FAILED) + iscsi_block_session(session_to_cls(session)); } mutex_unlock(&conn->xmitmutex); } +static void +iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) +{ + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_session *session = conn->session; + + switch (flag) { + case STOP_CONN_RECOVER: + case STOP_CONN_TERM: + iscsi_start_session_recovery(session, conn, flag); + return; + case STOP_CONN_SUSPEND: + iscsi_suspend_conn_rx(conn); + + mutex_lock(&conn->xmitmutex); + spin_lock_bh(&session->lock); + + conn->stop_stage = flag; + conn->c_stage = ISCSI_CONN_STOPPED; + set_bit(SUSPEND_BIT, &conn->suspend_tx); + + spin_unlock_bh(&session->lock); + mutex_unlock(&conn->xmitmutex); + break; + default: + printk(KERN_ERR "invalid stop flag %d\n", flag); + } +} + static int iscsi_conn_send_generic(struct iscsi_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size) @@ -3006,6 +3071,7 @@ iscsi_eh_host_reset(struct scsi_cmnd *sc) struct Scsi_Host *host = sc->device->host; struct iscsi_session *session = iscsi_hostdata(host->hostdata); struct iscsi_conn *conn = session->leadconn; + int fail_session = 0; spin_lock_bh(&session->lock); if (session->state == ISCSI_STATE_TERMINATE) { @@ -3016,18 +3082,31 @@ failed: return FAILED; } - if (sc->SCp.phase == session->age && - session->state != ISCSI_STATE_FAILED) { + if (sc->SCp.phase == session->age) { debug_scsi("failing connection CID %d due to SCSI host reset", conn->id); - iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); + fail_session = 1; } spin_unlock_bh(&session->lock); + /* + * we drop the lock here but the leadconn cannot be destoyed while + * we are in the scsi eh + */ + if (fail_session) { + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); + /* + * if userspace cannot respond then we must kick this off + * here for it + */ + iscsi_start_session_recovery(session, conn, STOP_CONN_RECOVER); + } + debug_scsi("iscsi_eh_host_reset wait for relogin\n"); wait_event_interruptible(conn->ehwait, session->state == ISCSI_STATE_TERMINATE || - session->state == ISCSI_STATE_LOGGED_IN); + session->state == ISCSI_STATE_LOGGED_IN || + session->recovery_failed); if (signal_pending(current)) flush_signals(current); @@ -3110,12 +3189,14 @@ iscsi_exec_abort_task(struct scsi_cmnd *sc, struct iscsi_cmd_task *ctask) * * 1) abort response * 2) abort timeout - * 3) session is terminated or restarted + * 3) session is terminated or restarted or userspace has + * given up on recovery */ wait_event_interruptible(conn->ehwait, sc->SCp.phase != session->age || session->state != ISCSI_STATE_LOGGED_IN || - conn->tmabort_state != TMABORT_INITIAL); + conn->tmabort_state != TMABORT_INITIAL || + session->recovery_failed); if (signal_pending(current)) flush_signals(current); del_timer_sync(&conn->tmabort_timer); @@ -3412,16 +3493,6 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param, struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_session *session = conn->session; - spin_lock_bh(&session->lock); - if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE && - conn->stop_stage != STOP_CONN_RECOVER) { - printk(KERN_ERR "iscsi_tcp: can not change parameter [%d]\n", - param); - spin_unlock_bh(&session->lock); - return 0; - } - spin_unlock_bh(&session->lock); - switch(param) { case ISCSI_PARAM_MAX_RECV_DLENGTH: { char *saveptr = conn->data; @@ -3744,6 +3815,7 @@ static struct iscsi_transport iscsi_tcp_transport = { .stop_conn = iscsi_conn_stop, .send_pdu = iscsi_conn_send_pdu, .get_stats = iscsi_conn_get_stats, + .session_recovery_timedout = iscsi_session_recovery_timedout, }; static int __init diff --git a/kernel/iscsi_tcp.h b/kernel/iscsi_tcp.h index 0e83cf7..12ef641 100644 --- a/kernel/iscsi_tcp.h +++ b/kernel/iscsi_tcp.h @@ -229,6 +229,7 @@ struct iscsi_session { * - mgmtpool, * * - r2tpool */ int state; /* session state */ + int recovery_failed; struct list_head item; void *auth_client; int conn_cnt; diff --git a/kernel/scsi_transport_iscsi.c b/kernel/scsi_transport_iscsi.c index f794a0a..c30d57d 100644 --- a/kernel/scsi_transport_iscsi.c +++ b/kernel/scsi_transport_iscsi.c @@ -31,7 +31,7 @@ #include "scsi_transport_iscsi.h" #include "iscsi_if.h" -#define ISCSI_SESSION_ATTRS 10 +#define ISCSI_SESSION_ATTRS 11 #define ISCSI_CONN_ATTRS 10 #define ISCSI_HOST_ATTRS 0 @@ -265,6 +265,35 @@ static int iscsi_user_scan(struct Scsi_Host *shost, uint channel, return 0; } +static void session_recovery_timedout(void *data) +{ + struct iscsi_cls_session *session = data; + + dev_printk(KERN_INFO, &session->dev, "session recovery timed out " + "after %d secs\n", session->recovery_tmo); + + if (session->transport->session_recovery_timedout) + session->transport->session_recovery_timedout(session); + + scsi_target_unblock(&session->dev); +} + +void iscsi_unblock_session(struct iscsi_cls_session *session) +{ + if (!cancel_delayed_work(&session->recovery_work)) + flush_scheduled_work(); + scsi_target_unblock(&session->dev); +} +EXPORT_SYMBOL_GPL(iscsi_unblock_session); + +void iscsi_block_session(struct iscsi_cls_session *session) +{ + scsi_target_block(&session->dev); + schedule_delayed_work(&session->recovery_work, + session->recovery_tmo * HZ); +} +EXPORT_SYMBOL_GPL(iscsi_block_session); + /** * iscsi_create_session - create iscsi class session * @shost: scsi host @@ -289,6 +318,8 @@ iscsi_create_session(struct Scsi_Host *shost, if (!session) goto module_put; session->transport = transport; + session->recovery_tmo = 120; + INIT_WORK(&session->recovery_work, session_recovery_timedout, session); if (transport->sessiondata_size) session->dd_data = &session[1]; @@ -344,6 +375,9 @@ int iscsi_destroy_session(struct iscsi_cls_session *session) list_del(&session->host_list); spin_unlock_irqrestore(shost->host_lock, flags); + if (!cancel_delayed_work(&session->recovery_work)) + flush_scheduled_work(); + transport_unregister_device(&session->dev); device_unregister(&session->dev); return 0; @@ -946,6 +980,11 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) return -EINVAL; switch (ev->u.set_param.param) { + case ISCSI_PARAM_SESS_RECOVERY_TMO: + iscsi_copy_param(ev, &value, data); + if (value != 0) + session->recovery_tmo = value; + break; case ISCSI_PARAM_TARGET_NAME: /* this should not change between logins */ if (session->targetname) @@ -1255,6 +1294,7 @@ static ISCSI_CLASS_ATTR(priv_sess, field, S_IRUGO, show_priv_session_##field, \ NULL) iscsi_priv_session_attr(targetname, "%s"); iscsi_priv_session_attr(tpgt, "%d"); +iscsi_priv_session_attr(recovery_tmo, "%d"); #define iscsi_priv_conn_attr_show(field, format) \ static ssize_t \ @@ -1445,6 +1485,7 @@ iscsi_register_transport(struct iscsi_transport *tt) SETUP_SESSION_RD_ATTR(data_pdu_in_order, ISCSI_PDU_INORDER_EN); SETUP_SESSION_RD_ATTR(data_seq_in_order, ISCSI_DATASEQ_INORDER_EN); SETUP_SESSION_RD_ATTR(erl, ISCSI_ERL); + SETUP_PRIV_SESSION_RD_ATTR(recovery_tmo); if (tt->param_mask & ISCSI_TARGET_NAME) SETUP_SESSION_RD_ATTR(targetname, ISCSI_TARGET_NAME); diff --git a/kernel/scsi_transport_iscsi.h b/kernel/scsi_transport_iscsi.h index 29676e1..7eab9fd 100644 --- a/kernel/scsi_transport_iscsi.h +++ b/kernel/scsi_transport_iscsi.h @@ -90,6 +90,7 @@ struct iscsi_transport { char *data, uint32_t data_size); void (*get_stats) (struct iscsi_cls_conn *conn, struct iscsi_stats *stats); + void (*session_recovery_timedout) (struct iscsi_cls_session *session); }; /* @@ -137,6 +138,10 @@ struct iscsi_cls_session { char *targetname; int tpgt; + /* recovery fields */ + int recovery_tmo; + struct work_struct recovery_work; + int target_id; int channel; @@ -165,6 +170,8 @@ extern int iscsi_destroy_session(struct iscsi_cls_session *session); extern struct iscsi_cls_conn *iscsi_create_conn(struct iscsi_cls_session *sess, uint32_t cid); extern int iscsi_destroy_conn(struct iscsi_cls_conn *conn); +extern void iscsi_unblock_session(struct iscsi_cls_session *session); +extern void iscsi_block_session(struct iscsi_cls_session *session); /* * session functions used by software iscsi @@ -338,7 +338,6 @@ idbm_update_node(node_rec_t *rec, node_rec_t *newrec) /* update rec->session */ __update_rec_int(rec, newrec, session.initial_cmdsn); - __update_rec_int(rec, newrec, session.reopen_max); __update_rec_int(rec, newrec, session.auth.authmethod); __update_rec_str(rec, newrec, session.auth.username, AUTH_STR_MAX_LEN); @@ -545,8 +544,6 @@ idbm_recinfo_node(node_rec_t *r, recinfo_t *ri) IDBM_SHOW, "manual", "automatic", num); __recinfo_int("node.session.initial_cmdsn", ri, r, session.initial_cmdsn, IDBM_SHOW, num); - __recinfo_int("node.session.reopen_max", ri, r, - session.reopen_max, IDBM_SHOW, num); __recinfo_int_o2("node.session.auth.authmethod", ri, r, session.auth.authmethod, IDBM_SHOW, "None", "CHAP", num); __recinfo_str("node.session.auth.username", ri, r, diff --git a/usr/initiator.c b/usr/initiator.c index c291057..fe2d2cf 100644 --- a/usr/initiator.c +++ b/usr/initiator.c @@ -128,7 +128,7 @@ void recvpool_put(iscsi_conn_t *conn, void *handle) * all related LUNs. */ static void -__session_delete_luns(iscsi_session_t *session) +__session_delete_devs(iscsi_session_t *session) { int lu = 0; int hostno = session->hostno; @@ -146,7 +146,7 @@ __session_delete_luns(iscsi_session_t *session) if (!(pid = fork())) { /* child */ log_debug(4, "deleting device using %s", sysfs_file); - write(fd, "1\n", 3); + write(fd, "1", 1); close(fd); exit(0); } @@ -164,6 +164,32 @@ __session_delete_luns(iscsi_session_t *session) } static void +__session_online_devs(iscsi_session_t *session) +{ + int lun = 0; + int hostno = session->hostno; + + do { + int fd; + + sprintf(sysfs_file, "/sys/class/scsi_host/host%d/" + "device/session%d/target%d:0:0/%d:0:0:%d/state", + hostno, session->id, hostno, hostno, lun); + fd = open(sysfs_file, O_WRONLY); + if (fd < 0) + continue; + log_debug(4, "online device using %s", sysfs_file); + if (write(fd, "running\n", 8) == -1 && errno != EINVAL) { + /* we should read the state */ + log_error("Could not online LUN %d err %d\n", + lun, errno); + } + close(fd); + + } while (++lun < 256); /* FIXME: hardcoded */ +} + +static void write_mgmt_login_rsp(queue_task_t *qtask, mgmt_ipc_err_e err) { qtask->u.login.rsp.err = err; @@ -564,8 +590,13 @@ __session_create(node_rec_t *rec, iscsi_provider_t *provider) strncpy(session->target_name, rec->name, TARGET_NAME_MAXLEN); session->rdma_ext = RDMA_EXT_NOT_NEGOTIATED; - /* session's misc parameters */ - session->reopen_cnt = rec->session.reopen_max; + /* session's eh parameters */ + session->replacement_timeout = rec->session.timeo.replacement_timeout; + if (session->replacement_timeout == 0) { + log_error("Cannot set replacement_timeout to zero. Setting " + "120 seconds\n"); + session->replacement_timeout = 120; + } /* OUI and uniqifying number */ session->isid[0] = DRIVER_ISID_0; @@ -621,11 +652,39 @@ __conn_noop_out_delete(iscsi_conn_t *conn) } } +static void +__session_conn_queue_flush(iscsi_conn_t *conn) +{ + iscsi_session_t *session = conn->session; + int count = session->queue->count, i; + unsigned char item_buf[sizeof(queue_item_t) + EVENT_PAYLOAD_MAX]; + queue_item_t *item = (queue_item_t *)(void *)item_buf; + + log_debug(3, "flushing per-connection events"); + + for (i = 0; i < count; i++) { + if (queue_consume(session->queue, EVENT_PAYLOAD_MAX, + item) == QUEUE_IS_EMPTY) { + log_error("queue damage detected..."); + break; + } + if (conn != item->context) { + queue_produce(session->queue, item->event_type, + item->context, item->data_size, + queue_item_data(item)); + } + /* do nothing */ + log_debug(7, "item %p(%d) flushed", item, item->event_type); + } +} + static int __session_conn_cleanup(iscsi_conn_t *conn) { iscsi_session_t *session = conn->session; + iscsi_io_disconnect(conn); + __session_conn_queue_flush(conn); __conn_noop_out_delete(conn); if (ipc->destroy_conn(session->transport_handle, session->id, @@ -639,11 +698,29 @@ __session_conn_cleanup(iscsi_conn_t *conn) log_error("can not safely destroy session %d", session->id); return MGMT_IPC_ERR_INTERNAL; } + if (conn->id == 0) __session_destroy(session); return 0; } +static int +session_conn_cleanup(iscsi_conn_t *conn, int do_stop) +{ + iscsi_session_t *session = conn->session; + + if (do_stop) { + if (ipc->stop_conn(session->transport_handle, session->id, + conn->id, STOP_CONN_TERM)) { + log_error("can't stop connection %d:%d (%d)", + session->id, conn->id, errno); + return MGMT_IPC_ERR_INTERNAL; + } + } + + return __session_conn_cleanup(conn); +} + static void __session_mgmt_ipc_login_cleanup(queue_task_t *qtask, mgmt_ipc_err_e err, int conn_cleanup) @@ -652,10 +729,9 @@ __session_mgmt_ipc_login_cleanup(queue_task_t *qtask, mgmt_ipc_err_e err, iscsi_session_t *session = conn->session; iscsi_session_r_stage_e r_stage = session->r_stage; - if (conn_cleanup) { - iscsi_io_disconnect(conn); + if (conn_cleanup) __session_conn_cleanup(conn); - } else { + else { session_conn_destroy(session, conn->id); if (conn->id == 0) __session_destroy(session); @@ -665,54 +741,6 @@ __session_mgmt_ipc_login_cleanup(queue_task_t *qtask, mgmt_ipc_err_e err, write_mgmt_login_rsp(qtask, err); } -static void -__session_conn_queue_flush(iscsi_conn_t *conn) -{ - iscsi_session_t *session = conn->session; - int count = session->queue->count, i; - unsigned char item_buf[sizeof(queue_item_t) + EVENT_PAYLOAD_MAX]; - queue_item_t *item = (queue_item_t *)(void *)item_buf; - - log_debug(3, "flushing per-connection events"); - - for (i = 0; i < count; i++) { - if (queue_consume(session->queue, EVENT_PAYLOAD_MAX, - item) == QUEUE_IS_EMPTY) { - log_error("queue damage detected..."); - break; - } - if (conn != item->context) { - queue_produce(session->queue, item->event_type, - item->context, item->data_size, - queue_item_data(item)); - } - /* do nothing */ - log_debug(7, "item %p(%d) flushed", item, item->event_type); - } -} - -static int -__session_free(iscsi_session_t *session) -{ - iscsi_conn_t *conn = &session->conn[0]; - - /* stop if connection is logged in */ - if (conn->state == STATE_LOGGED_IN) { - if (ipc->stop_conn(session->transport_handle, session->id, - conn->id, STOP_CONN_TERM)) { - log_error("can't stop connection %d:%d (%d)", - session->id, conn->id, errno); - return MGMT_IPC_ERR_INTERNAL; - } - log_debug(3, "connection %d:%d is stopped for termination", - session->id, conn->id); - } - - iscsi_io_disconnect(conn); - __session_conn_queue_flush(conn); - - return __session_conn_cleanup(conn); -} static int __send_nopin_rsp(iscsi_conn_t *conn, struct iscsi_nopin *rhdr, char *data) @@ -839,12 +867,16 @@ __session_conn_reopen(iscsi_conn_t *conn, queue_task_t *qtask, int do_stop) session->reopen_qtask.conn = conn; + actor_delete(&conn->connect_timer); __conn_noop_out_delete(conn); + conn->send_pdu_in_progress = 0; + conn->state = STATE_XPT_WAIT; + if (do_stop) { /* state: STATE_CLEANUP_WAIT */ if (ipc->stop_conn(session->transport_handle, session->id, - conn->id, STOP_CONN_RECOVER)) { + conn->id, do_stop)) { log_error("can't stop connection %d:%d (%d)", session->id, conn->id, errno); return -1; @@ -852,7 +884,6 @@ __session_conn_reopen(iscsi_conn_t *conn, queue_task_t *qtask, int do_stop) log_debug(3, "connection %d:%d is stopped for recovery", session->id, conn->id); iscsi_io_disconnect(conn); - __session_conn_queue_flush(conn); } rc = iscsi_io_tcp_connect(conn, 1); @@ -869,9 +900,6 @@ __session_conn_reopen(iscsi_conn_t *conn, queue_task_t *qtask, int do_stop) return MGMT_IPC_ERR_TCP_FAILURE; } - conn->send_pdu_in_progress = 0; - conn->state = STATE_XPT_WAIT; - queue_produce(session->queue, EV_CONN_POLL, qtask, 0, NULL); actor_schedule(&session->mainloop); actor_timer(&conn->connect_timer, conn->login_timeout*1000, @@ -886,10 +914,7 @@ session_conn_reopen(iscsi_conn_t *conn, int do_stop) iscsi_session_t *session = conn->session; int rc; - if (--session->reopen_cnt < 0) { - __session_conn_cleanup(conn); - return MGMT_IPC_ERR_LOGIN_FAILURE; - } + session->reopen_cnt++; /* * If we were temporarily redirected, we need to fall back to @@ -901,8 +926,10 @@ session_conn_reopen(iscsi_conn_t *conn, int do_stop) rc = __session_conn_reopen(conn, &session->reopen_qtask, do_stop); if (rc) { - log_error("reopen failed\n"); - __session_conn_cleanup(conn); + log_debug(4, "Requeue reopen attempt\n"); + queue_produce(session->queue, EV_CONN_TIMER, + &session->reopen_qtask, 0, NULL); + actor_schedule(&session->mainloop); } return rc; @@ -917,11 +944,13 @@ iscsi_login_redirect(iscsi_conn_t *conn) log_debug(3, "login redirect ...\n"); __session_conn_queue_flush(conn); - session->r_stage = R_STAGE_SESSION_REDIRECT; - if (__session_conn_reopen(conn, c->qtask, 1)) { + if (session->r_stage == R_STAGE_NO_CHANGE) + session->r_stage = R_STAGE_SESSION_REDIRECT; + + if (__session_conn_reopen(conn, c->qtask, STOP_CONN_RECOVER)) { log_error("redirct __session_conn_reopen failed\n"); - __session_free(session); + __session_conn_cleanup(conn); return 1; } @@ -955,7 +984,7 @@ setup_full_feature_phase(iscsi_conn_t *conn) int param; int len; void *value; - int conn_only; } conntbl[ISCSI_PARAM_PERSISTENT_PORT + 1] = { + int conn_only; } conntbl[ISCSI_PARAM_SESS_RECOVERY_TMO + 1] = { { .param = ISCSI_PARAM_MAX_RECV_DLENGTH, @@ -1047,7 +1076,13 @@ setup_full_feature_phase(iscsi_conn_t *conn) .value = &session->nrec.conn[conn->id].port, .len = sizeof(session->nrec.conn[conn->id].port), .conn_only = 1, + }, { + .param = ISCSI_PARAM_SESS_RECOVERY_TMO, + .value = &session->replacement_timeout, + .len = sizeof(uint32_t), + .conn_only = 0, } + /* * FIXME: set these timeouts via set_param() API * @@ -1072,7 +1107,7 @@ setup_full_feature_phase(iscsi_conn_t *conn) } /* Entered full-feature phase! */ - for (i = 0; i < ISCSI_PARAM_PERSISTENT_PORT + 1; i++) { + for (i = 0; i < ISCSI_PARAM_SESS_RECOVERY_TMO + 1; i++) { if (conn->id != 0 && !conntbl[i].conn_only) continue; if (!(session->param_mask & (1 << conntbl[i].param))) @@ -1115,15 +1150,17 @@ setup_full_feature_phase(iscsi_conn_t *conn) log_warning("connection%d:%d is operational now", session->id, conn->id); - } else + } else { + __session_online_devs(session); log_warning("connection%d:%d is operational after recovery " "(%d attempts)", session->id, conn->id, - session->nrec.session.reopen_max - session->reopen_cnt); + session->reopen_cnt); + } /* * reset ERL=0 reopen counter */ - session->reopen_cnt = session->nrec.session.reopen_max; + session->reopen_cnt = 0; session->r_stage = R_STAGE_NO_CHANGE; /* noop_out */ @@ -1350,49 +1387,69 @@ __session_conn_timer(queue_item_t *item) iscsi_conn_t *conn = qtask->conn; iscsi_session_t *session = conn->session; - if (conn->state == STATE_XPT_WAIT) { - if (session->r_stage == R_STAGE_NO_CHANGE) { + switch (conn->state) { + case STATE_XPT_WAIT: + switch (session->r_stage) { + case R_STAGE_NO_CHANGE: + case R_STAGE_SESSION_REDIRECT: log_debug(6, "conn_timer popped at XPT_WAIT: login"); /* timeout during initial connect. * clean connection. write ipc rsp */ __session_mgmt_ipc_login_cleanup(qtask, MGMT_IPC_ERR_TCP_TIMEOUT, 0); - } else if (session->r_stage == R_STAGE_SESSION_REOPEN) { + break; + case R_STAGE_SESSION_REOPEN: log_debug(6, "conn_timer popped at XPT_WAIT: reopen"); - /* timeout during reopen connect. - * try again or cleanup connection. */ + /* timeout during reopen connect. try again */ session_conn_reopen(conn, 0); + break; + case R_STAGE_SESSION_CLEANUP: + session_conn_cleanup(conn, 0); + break; + default: + break; } - } else if (conn->state == STATE_IN_LOGIN) { + + break; + case STATE_IN_LOGIN: iscsi_io_disconnect(conn); - if (session->r_stage == R_STAGE_NO_CHANGE || - session->r_stage == R_STAGE_SESSION_REDIRECT) { - log_debug(6, "conn_timer popped at IN_LOGIN"); - /* send pdu timeout. clean connection. write rsp */ - if (ipc->destroy_conn(session->transport_handle, - session->id, conn->id)) { - log_error("can not safely destroy " - "connection %d:%d", - session->id, conn->id); - } - if (ipc->destroy_session(session->transport_handle, - session->id)) { - log_error("can not safely destroy session %d", - session->id); - } - __session_mgmt_ipc_login_cleanup(qtask, - MGMT_IPC_ERR_PDU_TIMEOUT, 0); - } else if (session->r_stage == R_STAGE_SESSION_REOPEN) - session_conn_reopen(conn, 1); + + switch (session->r_stage) { + case R_STAGE_NO_CHANGE: + case R_STAGE_SESSION_REDIRECT: + log_debug(6, "conn_timer popped at IN_LOGIN: cleanup"); + /* + * send pdu timeout. during initial connect clean + * connection. write rsp + */ + write_mgmt_login_rsp(qtask, MGMT_IPC_ERR_PDU_TIMEOUT); + __session_conn_cleanup(conn); + break; + case R_STAGE_SESSION_REOPEN: + log_debug(6, "conn_timer popped at IN_LOGIN: reopen"); + session_conn_reopen(conn, STOP_CONN_RECOVER); + break; + case R_STAGE_SESSION_CLEANUP: + session_conn_cleanup(conn, 1); + break; + default: + break; + } + + default: + log_debug(8, "ignoring timeout in conn state %d\n", + conn->state); + break; } } static void __conn_error_handle(iscsi_session_t *session, iscsi_conn_t *conn) { - if (conn->state == STATE_LOGGED_IN) { - int i; + int i; + switch (conn->state) { + case STATE_LOGGED_IN: /* mark failed connection */ conn->state = STATE_CLEANUP_WAIT; @@ -1407,41 +1464,42 @@ __conn_error_handle(iscsi_session_t *session, iscsi_conn_t *conn) /* FIXME: re-assign leading connection * for ERL>0 */ } - } else { - /* mark all connections as failed */ - for (i=0; i<ISCSI_CONN_MAX; i++) { - if (session->conn[i].state == STATE_LOGGED_IN) { - session->conn[i].state = - STATE_CLEANUP_WAIT; - } - } - if (session->reopen_cnt - 1 > 0) - session->r_stage = R_STAGE_SESSION_REOPEN; - else - session->r_stage = R_STAGE_SESSION_CLEANUP; + + break; + } + + /* mark all connections as failed */ + for (i=0; i<ISCSI_CONN_MAX; i++) { + if (session->conn[i].state == STATE_LOGGED_IN) + session->conn[i].state = STATE_CLEANUP_WAIT; } - } else if (conn->state == STATE_IN_LOGIN) { + session->r_stage = R_STAGE_SESSION_REOPEN; + break; + case STATE_IN_LOGIN: if (session->r_stage == R_STAGE_SESSION_REOPEN) { conn->send_pdu_timer_remove(conn); - session_conn_reopen(conn, 1); - return; - } else { - log_debug(1, "ignoring conn error in login. " - "let it timeout"); + session_conn_reopen(conn, STOP_CONN_RECOVER); return; } - } else if (conn->state == STATE_XPT_WAIT) { + + log_debug(1, "ignoring conn error in login. " + "let it timeout"); + return; + case STATE_XPT_WAIT: log_debug(1, "ignoring conn error in XPT_WAIT. " "let connection fail on its own"); return; - } else if (conn->state == STATE_CLEANUP_WAIT) { + case STATE_CLEANUP_WAIT: log_debug(1, "ignoring conn error in CLEANUP_WAIT. " "let connection stop"); return; + default: + log_debug(8, "invalid state %d\n", conn->state); + return; } if (session->r_stage == R_STAGE_SESSION_REOPEN) { - session_conn_reopen(conn, 1); + session_conn_reopen(conn, STOP_CONN_RECOVER); return; } else { if (ipc->stop_conn(session->transport_handle, session->id, @@ -1459,7 +1517,6 @@ __conn_error_handle(iscsi_session_t *session, iscsi_conn_t *conn) __session_conn_cleanup(conn); } - static void __session_conn_error(queue_item_t *item) { @@ -1650,6 +1707,7 @@ session_logout_task(iscsi_session_t *session, queue_task_t *qtask) { iscsi_conn_t *conn; int rc = MGMT_IPC_OK; + int stop = 0; /* FIXME: logout all active connections */ conn = &session->conn[0]; @@ -1660,12 +1718,17 @@ session_logout_task(iscsi_session_t *session, queue_task_t *qtask) /* FIXME: implement Logout Request */ - __session_delete_luns(session); + __session_delete_devs(session); - if((rc=__session_free(session))) + if (conn->state == STATE_LOGGED_IN || + conn->state == STATE_IN_LOGIN) + stop = 1; + + rc = session_conn_cleanup(conn, stop); + if (rc) goto done; - qtask->u.login.rsp.err = MGMT_IPC_OK; + qtask->u.login.rsp.err = rc; qtask->u.login.rsp.command = MGMT_IPC_SESSION_LOGOUT; write(qtask->u.login.mgmt_ipc_fd, &qtask->u.login.rsp, sizeof(qtask->u.login.rsp)); diff --git a/usr/initiator.h b/usr/initiator.h index 804b57e..7163ce9 100644 --- a/usr/initiator.h +++ b/usr/initiator.h @@ -296,6 +296,7 @@ typedef struct iscsi_session { int reopen_cnt; queue_task_t reopen_qtask; iscsi_session_r_stage_e r_stage; + uint32_t replacement_timeout; /* session's processing */ actor_t mainloop; @@ -141,7 +141,7 @@ void idbm_node_setup_defaults(node_rec_t *rec) rec->session.auth.password_in_length = 0; rec->session.err_timeo.abort_timeout = 10; rec->session.err_timeo.reset_timeout = 30; - rec->session.timeo.replacement_timeout = 0; + rec->session.timeo.replacement_timeout = 120; rec->session.iscsi.InitialR2T = 0; rec->session.iscsi.ImmediateData = 1; rec->session.iscsi.FirstBurstLength = 256 * 1024; |