diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index cef5d49b59cd24..abf5e1ef89e99a 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -54,12 +54,15 @@
 #include "hpsa.h"
 
 /* HPSA_DRIVER_VERSION must be 3 byte values (0-255) separated by '.' */
-#define HPSA_DRIVER_VERSION "3.4.4-1"
+#define HPSA_DRIVER_VERSION "3.4.9-0"
 #define DRIVER_NAME "HP HPSA Driver (v " HPSA_DRIVER_VERSION ")"
 #define HPSA "hpsa"
 
-/* How long to wait (in milliseconds) for board to go into simple mode */
-#define MAX_CONFIG_WAIT 30000
+/* How long to wait for CISS doorbell communication */
+#define CLEAR_EVENT_WAIT_INTERVAL 20	/* ms for each msleep() call */
+#define MODE_CHANGE_WAIT_INTERVAL 10	/* ms for each msleep() call */
+#define MAX_CLEAR_EVENT_WAIT 30000	/* times 20 ms = 600 s */
+#define MAX_MODE_CHANGE_WAIT 100	/* times 10 ms = 1 s */
 #define MAX_IOCTL_CONFIG_WAIT 1000
 
 /*define how many times we will try a command because of bus resets */
@@ -103,7 +106,6 @@ static const struct pci_device_id hpsa_pci_device_id[] = {
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSH,     0x103C, 0x1922},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSH,     0x103C, 0x1923},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSH,     0x103C, 0x1924},
-	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSH,     0x103C, 0x1925},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSH,     0x103C, 0x1926},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSH,     0x103C, 0x1928},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSH,     0x103C, 0x1929},
@@ -149,6 +151,7 @@ static struct board_type products[] = {
 	{0x3249103C, "Smart Array P812", &SA5_access},
 	{0x324A103C, "Smart Array P712m", &SA5_access},
 	{0x324B103C, "Smart Array P711m", &SA5_access},
+	{0x3233103C, "HP StorageWorks 1210m", &SA5_access}, /* alias of 333f */
 	{0x3350103C, "Smart Array P222", &SA5_access},
 	{0x3351103C, "Smart Array P420", &SA5_access},
 	{0x3352103C, "Smart Array P421", &SA5_access},
@@ -194,17 +197,16 @@ static int number_of_controllers;
 static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id);
 static irqreturn_t do_hpsa_intr_msi(int irq, void *dev_id);
 static int hpsa_ioctl(struct scsi_device *dev, int cmd, void *arg);
-static void lock_and_start_io(struct ctlr_info *h);
-static void start_io(struct ctlr_info *h, unsigned long *flags);
 
 #ifdef CONFIG_COMPAT
 static int hpsa_compat_ioctl(struct scsi_device *dev, int cmd, void *arg);
 #endif
 
 static void cmd_free(struct ctlr_info *h, struct CommandList *c);
-static void cmd_special_free(struct ctlr_info *h, struct CommandList *c);
 static struct CommandList *cmd_alloc(struct ctlr_info *h);
-static struct CommandList *cmd_special_alloc(struct ctlr_info *h);
+static void cmd_tagged_free(struct ctlr_info *h, struct CommandList *c);
+static struct CommandList *cmd_tagged_alloc(struct ctlr_info *h,
+					    struct scsi_cmnd *scmd);
 static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 	void *buff, size_t size, u16 page_code, unsigned char *scsi3addr,
 	int cmd_type);
@@ -216,10 +218,12 @@ static int hpsa_scan_finished(struct Scsi_Host *sh,
 	unsigned long elapsed_time);
 static int hpsa_change_queue_depth(struct scsi_device *sdev,
 	int qdepth, int reason);
+static int hpsa_change_queue_type(struct scsi_device *sdev, int type);
 
 static int hpsa_eh_device_reset_handler(struct scsi_cmnd *scsicmd);
 static int hpsa_eh_abort_handler(struct scsi_cmnd *scsicmd);
 static int hpsa_slave_alloc(struct scsi_device *sdev);
+static int hpsa_slave_configure(struct scsi_device *sdev);
 static void hpsa_slave_destroy(struct scsi_device *sdev);
 
 static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno);
@@ -230,7 +234,8 @@ static void check_ioctl_unit_attention(struct ctlr_info *h,
 /* performant mode helper functions */
 static void calc_bucket_map(int *bucket, int num_buckets,
 	int nsgs, int min_blocks, int *bucket_map);
-static void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h);
+static void hpsa_free_performant_mode(struct ctlr_info *h);
+static int hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h);
 static inline u32 next_command(struct ctlr_info *h, u8 q);
 static int hpsa_find_cfg_addrs(struct pci_dev *pdev, void __iomem *vaddr,
 			       u32 *cfg_base_addr, u64 *cfg_base_addr_index,
@@ -241,14 +246,17 @@ static int hpsa_lookup_board_id(struct pci_dev *pdev, u32 *board_id);
 static int hpsa_wait_for_board_state(struct pci_dev *pdev, void __iomem *vaddr,
 				     int wait_for_ready);
 static inline void finish_cmd(struct CommandList *c);
-static void hpsa_wait_for_mode_change_ack(struct ctlr_info *h);
+static int hpsa_wait_for_mode_change_ack(struct ctlr_info *h);
 #define BOARD_NOT_READY 0
 #define BOARD_READY 1
 static void hpsa_drain_accel_commands(struct ctlr_info *h);
 static void hpsa_flush_cache(struct ctlr_info *h);
 static int hpsa_scsi_ioaccel_queue_command(struct ctlr_info *h,
 	struct CommandList *c, u32 ioaccel_handle, u8 *cdb, int cdb_len,
-	u8 *scsi3addr);
+	u8 *scsi3addr, struct hpsa_scsi_dev_t *phys_disk);
+static void hpsa_command_resubmit_worker(struct work_struct *work);
+static void print_cfg_table(struct device *dev, struct CfgTable *tb);
+static void detect_controller_lockup(struct ctlr_info *h);
 
 static inline struct ctlr_info *sdev_to_hba(struct scsi_device *sdev)
 {
@@ -262,40 +270,84 @@ static inline struct ctlr_info *shost_to_hba(struct Scsi_Host *sh)
 	return (struct ctlr_info *) *priv;
 }
 
+/* extract sense key, asc, and ascq from sense data.  -1 means invalid. */
+static void decode_sense_data(const u8 *sense_data, int sense_data_len,
+			int *sense_key, int *asc, int *ascq)
+{
+	if (sense_data_len < 1) {
+		*sense_key = -1;
+		*ascq = -1;
+		*asc = -1;
+		return;
+	}
+
+	switch (sense_data[0]) {
+	case 0x70: /* old format sense data */
+		*sense_key = (sense_data_len > 2) ? sense_data[2] & 0x0f : -1;
+		*ascq = (sense_data_len > 13) ?  sense_data[13] : -1;
+		*asc = (sense_data_len > 12) ?  sense_data[12] : -1;
+		break;
+	case 0x72: /* descriptor format sense data */
+		*sense_key = (sense_data_len > 1) ? sense_data[1] & 0x0f : -1;
+		*ascq = (sense_data_len > 2) ?  sense_data[2] : -1;
+		*asc = (sense_data_len > 3) ?  sense_data[3] : -1;
+		break;
+	default:
+		*sense_key = -1;
+		*ascq = -1;
+		*asc = -1;
+		break;
+	}
+}
+
 static int check_for_unit_attention(struct ctlr_info *h,
 	struct CommandList *c)
 {
-	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
+	int sense_key, asc, ascq;
+	int sense_len;
+
+	if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo))
+		sense_len = sizeof(c->err_info->SenseInfo);
+	else
+		sense_len = c->err_info->SenseLen;
+
+	decode_sense_data(c->err_info->SenseInfo, sense_len,
+				&sense_key, &asc, &ascq);
+	if (sense_key != UNIT_ATTENTION || asc == -1)
 		return 0;
 
-	switch (c->err_info->SenseInfo[12]) {
+	switch (asc) {
 	case STATE_CHANGED:
-		dev_warn(&h->pdev->dev, HPSA "%d: a state change "
-			"detected, command retried\n", h->ctlr);
+		dev_warn(&h->pdev->dev,
+			"%s: a state change detected, command retried\n",
+			h->devname);
 		break;
 	case LUN_FAILED:
-		dev_warn(&h->pdev->dev, HPSA "%d: LUN failure "
-			"detected, action required\n", h->ctlr);
+		dev_warn(&h->pdev->dev,
+			"%s: LUN failure detected\n", h->devname);
 		break;
 	case REPORT_LUNS_CHANGED:
-		dev_warn(&h->pdev->dev, HPSA "%d: report LUN data "
-			"changed, action required\n", h->ctlr);
+		dev_warn(&h->pdev->dev,
+			"%s: report LUN data changed\n", h->devname);
 	/*
 	 * Note: this REPORT_LUNS_CHANGED condition only occurs on the external
 	 * target (array) devices.
 	 */
 		break;
 	case POWER_OR_RESET:
-		dev_warn(&h->pdev->dev, HPSA "%d: a power on "
-			"or device reset detected\n", h->ctlr);
+		dev_warn(&h->pdev->dev,
+			"%s: a power on or device reset detected\n",
+			h->devname);
 		break;
 	case UNIT_ATTENTION_CLEARED:
-		dev_warn(&h->pdev->dev, HPSA "%d: unit attention "
-		    "cleared by another initiator\n", h->ctlr);
+		dev_warn(&h->pdev->dev,
+			"%s: unit attention cleared by another initiator\n",
+			h->devname);
 		break;
 	default:
-		dev_warn(&h->pdev->dev, HPSA "%d: unknown "
-			"unit attention detected\n", h->ctlr);
+		dev_warn(&h->pdev->dev,
+			"%s: unknown unit attention detected\n",
+			h->devname);
 		break;
 	}
 	return 1;
@@ -311,6 +363,95 @@ static int check_for_busy(struct ctlr_info *h, struct CommandList *c)
 	return 1;
 }
 
+static ssize_t host_store_lockup_detector(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct Scsi_Host *shost;
+	struct ctlr_info *h;
+	int len, enabled;
+	char tmpbuf[10];
+
+	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+		return -EACCES;
+	len = count > sizeof(tmpbuf) - 1 ? sizeof(tmpbuf) - 1 : count;
+	strncpy(tmpbuf, buf, len);
+	tmpbuf[len] = '\0';
+	if (sscanf(tmpbuf, "%d", &enabled) != 1)
+		return -EINVAL;
+	shost = class_to_shost(dev);
+	h = shost_to_hba(shost);
+	h->lockup_detector_enabled = !!enabled;
+	return count;
+}
+
+static ssize_t host_show_lockup_detector(struct device *dev,
+	     struct device_attribute *attr, char *buf)
+{
+	struct ctlr_info *h;
+	struct Scsi_Host *shost = class_to_shost(dev);
+
+	h = shost_to_hba(shost);
+	return snprintf(buf, 20, "%d\n", h->lockup_detector_enabled);
+}
+
+static ssize_t host_store_abort_test(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct Scsi_Host *shost;
+	struct ctlr_info *h;
+	int len, value, timeout;
+	char tmpbuf[10];
+
+	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+		return -EACCES;
+	len = count > sizeof(tmpbuf) - 1 ? sizeof(tmpbuf) - 1 : count;
+	strncpy(tmpbuf, buf, len);
+	tmpbuf[len] = '\0';
+	if (sscanf(tmpbuf, "%d %d", &value, &timeout) != 2)
+		return -EINVAL;
+	shost = class_to_shost(dev);
+	h = shost_to_hba(shost);
+	h->abort_test = value;
+	h->abort_timeout = timeout;
+	return count;
+}
+
+static ssize_t host_show_abort_test(struct device *dev,
+	     struct device_attribute *attr, char *buf)
+{
+	struct ctlr_info *h;
+	struct Scsi_Host *shost = class_to_shost(dev);
+
+	h = shost_to_hba(shost);
+	return snprintf(buf, 20, "%d %d\n", h->abort_test, h->abort_timeout);
+}
+
+static u32 lockup_detected(struct ctlr_info *h);
+static ssize_t host_show_lockup_detected(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	int i, ld, c, cpu;
+	struct ctlr_info *h;
+	struct Scsi_Host *shost = class_to_shost(dev);
+
+	h = shost_to_hba(shost);
+	ld = lockup_detected(h);
+
+	c = sprintf(buf, "ld=%d: ", ld);
+	cpu = cpumask_first(cpu_online_mask);
+	for (i = 0; i < num_online_cpus(); i++) {
+		u32 *lockup_detected;
+		lockup_detected = per_cpu_ptr(h->lockup_detected, cpu);
+		ld = *lockup_detected;
+		cpu = cpumask_next(cpu, cpu_online_mask);
+		c += sprintf(buf + c, "%d,", ld);
+	}
+	c += sprintf(buf + c, "\n");
+	return c;
+}
+
 static ssize_t host_store_hp_ssd_smart_path_status(struct device *dev,
 					 struct device_attribute *attr,
 					 const char *buf, size_t count)
@@ -371,6 +512,85 @@ static ssize_t host_store_rescan(struct device *dev,
 	return count;
 }
 
+static ssize_t host_store_hpsa_intcoal_delay(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	int len;
+	unsigned int value;
+	char tmpbuf[8];
+	struct ctlr_info *h;
+	struct Scsi_Host *shost = class_to_shost(dev);
+
+	len = count > sizeof(tmpbuf) - 1 ? sizeof(tmpbuf) - 1 : count;
+	strncpy(tmpbuf, buf, len);
+	tmpbuf[len] = '\0';
+	if (sscanf(tmpbuf, "%u", &value) != 1)
+		 return -EINVAL;
+	/* the device policies the values written, so no need here */
+
+	h = shost_to_hba(shost);
+	print_cfg_table(&h->pdev->dev, h->cfgtable);
+	writel(value, &h->cfgtable->HostWrite.CoalIntDelay);
+	print_cfg_table(&h->pdev->dev, h->cfgtable);
+	writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+	(void) hpsa_wait_for_mode_change_ack(h);
+	print_cfg_table(&h->pdev->dev, h->cfgtable);
+	return count;
+}
+
+static ssize_t host_store_hpsa_intcoal_count(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	int len;
+	unsigned int value;
+	char tmpbuf[8];
+	struct ctlr_info *h;
+	struct Scsi_Host *shost = class_to_shost(dev);
+
+	len = count > sizeof(tmpbuf) - 1 ? sizeof(tmpbuf) - 1 : count;
+	strncpy(tmpbuf, buf, len);
+	tmpbuf[len] = '\0';
+	if (sscanf(tmpbuf, "%u", &value) != 1)
+		 return -EINVAL;
+	/* the device policies the values written, so no need here */
+
+	h = shost_to_hba(shost);
+	print_cfg_table(&h->pdev->dev, h->cfgtable);
+	writel(value, &h->cfgtable->HostWrite.CoalIntCount);
+	print_cfg_table(&h->pdev->dev, h->cfgtable);
+	writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+	(void) hpsa_wait_for_mode_change_ack(h);
+	print_cfg_table(&h->pdev->dev, h->cfgtable);
+	return count;
+}
+
+static ssize_t host_show_hpsa_intcoal_delay(struct device *dev,
+	     struct device_attribute *attr, char *buf)
+{
+	unsigned int value;
+	struct ctlr_info *h;
+	struct Scsi_Host *shost = class_to_shost(dev);
+
+	h = shost_to_hba(shost);
+	value = readl(&h->cfgtable->HostWrite.CoalIntDelay);
+	return snprintf(buf, 20, "%u\n", value);
+}
+
+static ssize_t host_show_hpsa_intcoal_count(struct device *dev,
+	     struct device_attribute *attr, char *buf)
+{
+	unsigned int value;
+	struct ctlr_info *h;
+	struct Scsi_Host *shost = class_to_shost(dev);
+
+	h = shost_to_hba(shost);
+	value = readl(&h->cfgtable->HostWrite.CoalIntCount);
+	return snprintf(buf, 20, "%u\n", value);
+}
+
+
 static ssize_t host_show_firmware_revision(struct device *dev,
 	     struct device_attribute *attr, char *buf)
 {
@@ -392,7 +612,8 @@ static ssize_t host_show_commands_outstanding(struct device *dev,
 	struct Scsi_Host *shost = class_to_shost(dev);
 	struct ctlr_info *h = shost_to_hba(shost);
 
-	return snprintf(buf, 20, "%d\n", h->commands_outstanding);
+	return snprintf(buf, 20, "%d\n",
+			atomic_read(&h->commands_outstanding));
 }
 
 static ssize_t host_show_transport_mode(struct device *dev,
@@ -463,24 +684,31 @@ static u32 soft_unresettable_controller[] = {
 	0x409D0E11, /* Smart Array 6400 EM */
 };
 
-static int ctlr_is_hard_resettable(u32 board_id)
+static u32 needs_abort_tags_swizzled[] = {
+	0x324a103C, /* Smart Array P712m */
+	0x324b103C, /* SmartArray P711m */
+};
+
+static int board_id_in_array(u32 a[], int nelems, u32 board_id)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++)
-		if (unresettable_controller[i] == board_id)
-			return 0;
-	return 1;
+	for (i = 0; i < nelems; i++)
+		if (a[i] == board_id)
+			return 1;
+	return 0;
 }
 
-static int ctlr_is_soft_resettable(u32 board_id)
+static int ctlr_is_hard_resettable(u32 board_id)
 {
-	int i;
+	return !board_id_in_array(unresettable_controller,
+			ARRAY_SIZE(unresettable_controller), board_id);
+}
 
-	for (i = 0; i < ARRAY_SIZE(soft_unresettable_controller); i++)
-		if (soft_unresettable_controller[i] == board_id)
-			return 0;
-	return 1;
+static int ctlr_is_soft_resettable(u32 board_id)
+{
+	return !board_id_in_array(soft_unresettable_controller,
+			ARRAY_SIZE(soft_unresettable_controller), board_id);
 }
 
 static int ctlr_is_resettable(u32 board_id)
@@ -489,6 +717,12 @@ static int ctlr_is_resettable(u32 board_id)
 		ctlr_is_soft_resettable(board_id);
 }
 
+static int ctlr_needs_abort_tags_swizzled(u32 board_id)
+{
+	return board_id_in_array(needs_abort_tags_swizzled,
+			ARRAY_SIZE(needs_abort_tags_swizzled), board_id);
+}
+
 static ssize_t host_show_resettable(struct device *dev,
 	struct device_attribute *attr, char *buf)
 {
@@ -504,8 +738,8 @@ static inline int is_logical_dev_addr_mode(unsigned char scsi3addr[])
 	return (scsi3addr[3] & 0xC0) == 0x40;
 }
 
-static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
-	"1(ADM)", "UNKNOWN"
+static const char * const raid_label[] = { "0", "4", "1(+0)", "5", "5+1", "6",
+	"1(+0)ADM", "UNKNOWN"
 };
 #define HPSA_RAID_0	0
 #define HPSA_RAID_4	1
@@ -628,6 +862,8 @@ static DEVICE_ATTR(raid_level, S_IRUGO, raid_level_show, NULL);
 static DEVICE_ATTR(lunid, S_IRUGO, lunid_show, NULL);
 static DEVICE_ATTR(unique_id, S_IRUGO, unique_id_show, NULL);
 static DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan);
+static DEVICE_ATTR(hpsa_intcoal_delay, S_IWUSR|S_IRUGO, host_show_hpsa_intcoal_delay, host_store_hpsa_intcoal_delay);
+static DEVICE_ATTR(hpsa_intcoal_count, S_IWUSR|S_IRUGO, host_show_hpsa_intcoal_count, host_store_hpsa_intcoal_count);
 static DEVICE_ATTR(hp_ssd_smart_path_enabled, S_IRUGO,
 			host_show_hp_ssd_smart_path_enabled, NULL);
 static DEVICE_ATTR(hp_ssd_smart_path_status, S_IWUSR|S_IRUGO|S_IROTH,
@@ -643,6 +879,12 @@ static DEVICE_ATTR(transport_mode, S_IRUGO,
 	host_show_transport_mode, NULL);
 static DEVICE_ATTR(resettable, S_IRUGO,
 	host_show_resettable, NULL);
+static DEVICE_ATTR(lockup_detector, S_IWUSR|S_IRUGO,
+	host_show_lockup_detector, host_store_lockup_detector);
+static DEVICE_ATTR(lockup_detected, S_IRUGO,
+	host_show_lockup_detected, NULL);
+static DEVICE_ATTR(abort_test, S_IWUSR|S_IRUGO,
+	host_show_abort_test, host_store_abort_test);
 
 static struct device_attribute *hpsa_sdev_attrs[] = {
 	&dev_attr_raid_level,
@@ -657,12 +899,20 @@ static struct device_attribute *hpsa_shost_attrs[] = {
 	&dev_attr_firmware_revision,
 	&dev_attr_commands_outstanding,
 	&dev_attr_transport_mode,
+	&dev_attr_hpsa_intcoal_delay,
+	&dev_attr_hpsa_intcoal_count,
 	&dev_attr_resettable,
 	&dev_attr_hp_ssd_smart_path_status,
 	&dev_attr_raid_offload_debug,
+	&dev_attr_lockup_detector,
+	&dev_attr_lockup_detected,
+	&dev_attr_abort_test,
 	NULL,
 };
 
+#define HPSA_NRESERVED_CMDS	(HPSA_CMDS_RESERVED_FOR_ABORTS + \
+		HPSA_CMDS_RESERVED_FOR_DRIVER + HPSA_MAX_CONCURRENT_PASSTHRUS)
+
 static struct scsi_host_template hpsa_driver_template = {
 	.module			= THIS_MODULE,
 	.name			= HPSA,
@@ -671,12 +921,14 @@ static struct scsi_host_template hpsa_driver_template = {
 	.scan_start		= hpsa_scan_start,
 	.scan_finished		= hpsa_scan_finished,
 	.change_queue_depth	= hpsa_change_queue_depth,
+	.change_queue_type	= hpsa_change_queue_type,
 	.this_id		= -1,
 	.use_clustering		= ENABLE_CLUSTERING,
 	.eh_abort_handler	= hpsa_eh_abort_handler,
 	.eh_device_reset_handler = hpsa_eh_device_reset_handler,
 	.ioctl			= hpsa_ioctl,
 	.slave_alloc		= hpsa_slave_alloc,
+	.slave_configure	= hpsa_slave_configure,
 	.slave_destroy		= hpsa_slave_destroy,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl		= hpsa_compat_ioctl,
@@ -687,18 +939,10 @@ static struct scsi_host_template hpsa_driver_template = {
 	.no_write_same = 1,
 };
 
-
-/* Enqueuing and dequeuing functions for cmdlists. */
-static inline void addQ(struct list_head *list, struct CommandList *c)
-{
-	list_add_tail(&c->list, list);
-}
-
 static inline u32 next_command(struct ctlr_info *h, u8 q)
 {
 	u32 a;
 	struct reply_queue_buffer *rq = &h->reply_queue[q];
-	unsigned long flags;
 
 	if (h->transMethod & CFGTBL_Trans_io_accel1)
 		return h->access.command_completed(h, q);
@@ -709,9 +953,7 @@ static inline u32 next_command(struct ctlr_info *h, u8 q)
 	if ((rq->head[rq->current_entry] & 1) == rq->wraparound) {
 		a = rq->head[rq->current_entry];
 		rq->current_entry++;
-		spin_lock_irqsave(&h->lock, flags);
-		h->commands_outstanding--;
-		spin_unlock_irqrestore(&h->lock, flags);
+		atomic_dec(&h->commands_outstanding);
 	} else {
 		a = FIFO_EMPTY;
 	}
@@ -753,25 +995,35 @@ static inline u32 next_command(struct ctlr_info *h, u8 q)
  * set bit 0 for pull model, bits 3-1 for block fetch
  * register number
  */
-static void set_performant_mode(struct ctlr_info *h, struct CommandList *c)
+#define DEFAULT_REPLY_QUEUE (-1)
+static void set_performant_mode(struct ctlr_info *h, struct CommandList *c,
+					int reply_queue)
 {
 	if (likely(h->transMethod & CFGTBL_Trans_Performant)) {
 		c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
-		if (likely(h->msix_vector > 0))
+		if (unlikely(!h->msix_vector))
+			return;
+		if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
 			c->Header.ReplyQueue =
 				raw_smp_processor_id() % h->nreply_queues;
+		else
+			c->Header.ReplyQueue = reply_queue % h->nreply_queues;
 	}
 }
 
 static void set_ioaccel1_performant_mode(struct ctlr_info *h,
-						struct CommandList *c)
+						struct CommandList *c,
+						int reply_queue)
 {
 	struct io_accel1_cmd *cp = &h->ioaccel_cmd_pool[c->cmdindex];
 
 	/* Tell the controller to post the reply to the queue for this
 	 * processor.  This seems to give the best I/O throughput.
 	 */
-	cp->ReplyQueue = smp_processor_id() % h->nreply_queues;
+	if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
+		cp->ReplyQueue = smp_processor_id() % h->nreply_queues;
+	else
+		cp->ReplyQueue = reply_queue % h->nreply_queues;
 	/* Set the bits in the address sent down to include:
 	 *  - performant mode bit (bit 0)
 	 *  - pull count (bits 1-3)
@@ -781,15 +1033,41 @@ static void set_ioaccel1_performant_mode(struct ctlr_info *h,
 					IOACCEL1_BUSADDR_CMDTYPE;
 }
 
+static void set_ioaccel2_tmf_performant_mode(struct ctlr_info *h,
+						struct CommandList *c,
+						int reply_queue)
+{
+	struct hpsa_tmf_struct *cp = (struct hpsa_tmf_struct *)
+		&h->ioaccel2_cmd_pool[c->cmdindex];
+
+	/* Tell the controller to post the reply to the queue for this
+	 * processor.  This seems to give the best I/O throughput.
+	 */
+	if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
+		cp->reply_queue = smp_processor_id() % h->nreply_queues;
+	else
+		cp->reply_queue = reply_queue % h->nreply_queues;
+	/* Set the bits in the address sent down to include:
+	 *  - performant mode bit not used in ioaccel mode 2
+	 *  - pull count (bits 0-3)
+	 *  - command type isn't needed for ioaccel2
+	 */
+	c->busaddr |= (h->ioaccel2_blockFetchTable[0]);
+}
+
 static void set_ioaccel2_performant_mode(struct ctlr_info *h,
-						struct CommandList *c)
+						struct CommandList *c,
+						int reply_queue)
 {
 	struct io_accel2_cmd *cp = &h->ioaccel2_cmd_pool[c->cmdindex];
 
 	/* Tell the controller to post the reply to the queue for this
 	 * processor.  This seems to give the best I/O throughput.
 	 */
-	cp->reply_queue = smp_processor_id() % h->nreply_queues;
+	if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
+		cp->reply_queue = smp_processor_id() % h->nreply_queues;
+	else
+		cp->reply_queue = reply_queue % h->nreply_queues;
 	/* Set the bits in the address sent down to include:
 	 *  - performant mode bit not used in ioaccel mode 2
 	 *  - pull count (bits 0-3)
@@ -827,34 +1105,61 @@ static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h,
 		h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL;
 }
 
-static void enqueue_cmd_and_start_io(struct ctlr_info *h,
-	struct CommandList *c)
+static void __enqueue_cmd_and_start_io(struct ctlr_info *h,
+	struct CommandList *c, int reply_queue);
+
+static void hpsa_abort_torture_worker(struct work_struct *work)
 {
-	unsigned long flags;
+	struct CommandList *c = container_of(to_delayed_work(work),
+					struct CommandList, abort_torture_work);
+	dev_warn(&c->h->pdev->dev, "Submitting delayed command\n");
+	__enqueue_cmd_and_start_io(c->h, c, DEFAULT_REPLY_QUEUE);
+}
 
+static void __enqueue_cmd_and_start_io(struct ctlr_info *h,
+	struct CommandList *c, int reply_queue)
+{
+	if (h->abort_test > 0 && 
+		(c->cmd_type == CMD_SCSI ||
+			c->cmd_type == CMD_IOACCEL1||
+			c->cmd_type == CMD_IOACCEL2)) {
+		h->abort_test = 0;
+		dev_warn(&h->pdev->dev, "delaying command for %d secs.\n",
+					h->abort_timeout);
+		INIT_DELAYED_WORK(&c->abort_torture_work,
+					hpsa_abort_torture_worker);
+		schedule_delayed_work(&c->abort_torture_work, h->abort_timeout * HZ);
+		atomic_inc(&h->cmds_sent);
+		return;
+	}
+	dial_down_lockup_detection_during_fw_flash(h, c);
+	atomic_inc(&h->commands_outstanding);
+	atomic_inc(&h->cmds_sent);
 	switch (c->cmd_type) {
 	case CMD_IOACCEL1:
-		set_ioaccel1_performant_mode(h, c);
+		set_ioaccel1_performant_mode(h, c, reply_queue);
+		writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
 		break;
 	case CMD_IOACCEL2:
-		set_ioaccel2_performant_mode(h, c);
+		set_ioaccel2_performant_mode(h, c, reply_queue);
+		writel(c->busaddr, h->vaddr + IOACCEL2_INBOUND_POSTQ_32);
+		break;
+	case IOACCEL2_TMF:
+		set_ioaccel2_tmf_performant_mode(h, c, reply_queue);
+		writel(c->busaddr, h->vaddr + IOACCEL2_INBOUND_POSTQ_32);
 		break;
 	default:
-		set_performant_mode(h, c);
+		set_performant_mode(h, c, reply_queue);
+		h->access.submit_command(h, c);
 	}
-	dial_down_lockup_detection_during_fw_flash(h, c);
-	spin_lock_irqsave(&h->lock, flags);
-	addQ(&h->reqQ, c);
-	h->Qdepth++;
-	start_io(h, &flags);
-	spin_unlock_irqrestore(&h->lock, flags);
 }
 
-static inline void removeQ(struct CommandList *c)
+static void enqueue_cmd_and_start_io(struct ctlr_info *h, struct CommandList *c)
 {
-	if (WARN_ON(list_empty(&c->list)))
-		return;
-	list_del_init(&c->list);
+	if (unlikely(c->abort_pending))
+		return finish_cmd(c);
+
+	__enqueue_cmd_and_start_io(h, c, DEFAULT_REPLY_QUEUE);
 }
 
 static inline int is_hba_lunid(unsigned char scsi3addr[])
@@ -962,17 +1267,23 @@ static int hpsa_scsi_add_entry(struct ctlr_info *h, int hostno,
 
 	h->dev[n] = device;
 	h->ndevices++;
+	device->offload_to_be_enabled = device->offload_enabled;
+	device->offload_enabled = 0;
 	added[*nadded] = device;
 	(*nadded)++;
 
-	/* initially, (before registering with scsi layer) we don't
-	 * know our hostno and we don't want to print anything first
-	 * time anyway (the scsi layer's inquiries will show that info)
-	 */
-	/* if (hostno != -1) */
-		dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d added.\n",
-			scsi_device_type(device->devtype), hostno,
-			device->bus, device->target, device->lun);
+	dev_info(&h->pdev->dev,
+		"%6s scsi %d:%d:%d:%d: %s %.8s %.16s RAID-%s SSDSmartPathCap%c En%c Exp=%d\n",
+		device->expose_state & HPSA_SCSI_ADD ? "added" : "masked",
+		hostno, device->bus, device->target, device->lun,
+		scsi_device_type(device->devtype),
+		device->vendor,
+		device->model,
+		device->raid_level > RAID_UNKNOWN ?
+			"RAID-?" : raid_label[device->raid_level],
+		device->offload_config ? '+' : '-',
+		device->offload_to_be_enabled ? '+' : '-',
+		device->expose_state);
 	return 0;
 }
 
@@ -986,16 +1297,47 @@ static void hpsa_scsi_update_entry(struct ctlr_info *h, int hostno,
 	/* Raid level changed. */
 	h->dev[entry]->raid_level = new_entry->raid_level;
 
-	/* Raid offload parameters changed. */
+	/* Raid offload parameters changed.  Careful about the ordering. */
+	if (new_entry->offload_config && new_entry->offload_enabled) {
+		/* if drive is newly offload_enabled, we want to copy the
+		 * raid map data first.  If previously offload_enabled and
+		 * offload_config were set, raid map data had better be
+		 * the same as it was before.  if raid map data is changed
+		 * then it had better be the case that
+		 * h->dev[entry]->offload_enabled is currently 0.
+		 */
+		h->dev[entry]->raid_map = new_entry->raid_map;
+		h->dev[entry]->ioaccel_handle = new_entry->ioaccel_handle;
+	}
+	if (new_entry->hba_ioaccel_enabled) {
+		h->dev[entry]->ioaccel_handle = new_entry->ioaccel_handle;
+		wmb(); /* set ioaccel_handle *before* hba_ioaccel_eanbled */
+	}
+	h->dev[entry]->hba_ioaccel_enabled = new_entry->hba_ioaccel_enabled;
 	h->dev[entry]->offload_config = new_entry->offload_config;
-	h->dev[entry]->offload_enabled = new_entry->offload_enabled;
-	h->dev[entry]->ioaccel_handle = new_entry->ioaccel_handle;
 	h->dev[entry]->offload_to_mirror = new_entry->offload_to_mirror;
-	h->dev[entry]->raid_map = new_entry->raid_map;
 
-	dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d updated.\n",
-		scsi_device_type(new_entry->devtype), hostno, new_entry->bus,
-		new_entry->target, new_entry->lun);
+	/* We can turn off ioaccel offload now, but need to delay turning
+	 * it on until we can update h->dev[entry]->phys_disk[], but we
+	 * can't do that until all the devices are updated.
+	 */
+	h->dev[entry]->offload_to_be_enabled = new_entry->offload_enabled;
+	if (!new_entry->offload_enabled)
+		h->dev[entry]->offload_enabled = 0;
+	h->dev[entry]->queue_depth = new_entry->queue_depth;
+
+	dev_info(&h->pdev->dev,
+		"updated scsi %d:%d:%d:%d: %s %.8s %.16s RAID-%s SSDSmartPathCap%c En%c Exp=%d\n",
+		hostno, h->dev[entry]->bus,
+		h->dev[entry]->target, h->dev[entry]->lun,
+		scsi_device_type(h->dev[entry]->devtype),
+		h->dev[entry]->vendor,
+		h->dev[entry]->model,
+		h->dev[entry]->raid_level > RAID_UNKNOWN ?
+			"RAID-?" : raid_label[h->dev[entry]->raid_level],
+		h->dev[entry]->offload_config ? '+' : '-',
+		h->dev[entry]->offload_to_be_enabled ? '+' : '-',
+		h->dev[entry]->expose_state);
 }
 
 /* Replace an entry from h->dev[] array. */
@@ -1018,12 +1360,22 @@ static void hpsa_scsi_replace_entry(struct ctlr_info *h, int hostno,
 		new_entry->lun = h->dev[entry]->lun;
 	}
 
+	new_entry->offload_to_be_enabled = new_entry->offload_enabled;
+	new_entry->offload_enabled = 0;
 	h->dev[entry] = new_entry;
 	added[*nadded] = new_entry;
 	(*nadded)++;
-	dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d changed.\n",
-		scsi_device_type(new_entry->devtype), hostno, new_entry->bus,
-			new_entry->target, new_entry->lun);
+	dev_info(&h->pdev->dev,
+		"replaced scsi %d:%d:%d:%d: %s %.8s %.16s RAID-%s SSDSmartPathCap%c En%c Exp=%d\n",
+		hostno, new_entry->bus, new_entry->target, new_entry->lun,
+		scsi_device_type(new_entry->devtype),
+		new_entry->vendor,
+		new_entry->model,
+		new_entry->raid_level > RAID_UNKNOWN ?
+			"RAID-?" : raid_label[new_entry->raid_level],
+		new_entry->offload_config ? '+' : '-',
+		new_entry->offload_to_be_enabled ? '+' : '-',
+		new_entry->expose_state);
 }
 
 /* Remove an entry from h->dev[] array. */
@@ -1043,9 +1395,17 @@ static void hpsa_scsi_remove_entry(struct ctlr_info *h, int hostno, int entry,
 	for (i = entry; i < h->ndevices-1; i++)
 		h->dev[i] = h->dev[i+1];
 	h->ndevices--;
-	dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d removed.\n",
-		scsi_device_type(sd->devtype), hostno, sd->bus, sd->target,
-		sd->lun);
+	dev_info(&h->pdev->dev,
+		"removed scsi %d:%d:%d:%d: %s %.8s %.16s RAID-%s SSDSmartPathCap%c En%c Exp=%d\n",
+		hostno, sd->bus, sd->target, sd->lun,
+		scsi_device_type(sd->devtype),
+		sd->vendor,
+		sd->model,
+		sd->raid_level > RAID_UNKNOWN ?
+			"RAID-?" : raid_label[sd->raid_level],
+		sd->offload_config ? '+' : '-',
+		sd->offload_enabled ? '+' : '-',
+		sd->expose_state);
 }
 
 #define SCSI3ADDR_EQ(a, b) ( \
@@ -1117,6 +1477,8 @@ static inline int device_updated(struct hpsa_scsi_dev_t *dev1,
 		return 1;
 	if (dev1->offload_enabled != dev2->offload_enabled)
 		return 1;
+	if (dev1->queue_depth != dev2->queue_depth)
+		return 1;
 	return 0;
 }
 
@@ -1262,6 +1624,91 @@ static void hpsa_show_volume_status(struct ctlr_info *h,
 	}
 }
 
+/* Figure the list of physical drive pointers for a logical drive with
+ * raid offload configured.
+ */
+static void hpsa_figure_phys_disk_ptrs(struct ctlr_info *h,
+				struct hpsa_scsi_dev_t *dev[], int ndevices,
+				struct hpsa_scsi_dev_t *logical_drive)
+{
+	struct raid_map_data *map = &logical_drive->raid_map;
+	struct raid_map_disk_data *dd = &map->data[0];
+	int i, j;
+	int nraid_map_entries = map->row_cnt * map->layout_map_count *
+		(map->data_disks_per_row + map->metadata_disks_per_row);
+	int nphys_disk = map->layout_map_count *
+		(map->data_disks_per_row + map->metadata_disks_per_row);
+	int qdepth;
+
+	if (nraid_map_entries > RAID_MAP_MAX_ENTRIES)
+		nraid_map_entries = RAID_MAP_MAX_ENTRIES;
+
+	qdepth = 0;
+	for (i = 0; i < nraid_map_entries; i++) {
+		logical_drive->phys_disk[i] = NULL;
+		if (!logical_drive->offload_config)
+			continue;
+		for (j = 0; j < ndevices; j++) {
+			if (dev[j]->devtype != TYPE_DISK)
+				continue;
+			if (is_logical_dev_addr_mode(dev[j]->scsi3addr))
+				continue;
+			if (dev[j]->ioaccel_handle != dd[i].ioaccel_handle)
+				continue;
+
+			logical_drive->phys_disk[i] = dev[j];
+			if (i < nphys_disk)
+				qdepth = min(h->nr_cmds, qdepth +
+				    logical_drive->phys_disk[i]->queue_depth);
+			break;
+		}
+
+		/*
+		 * This can happen if a physical drive is removed and
+		 * the logical drive is degraded.  In that case, the RAID
+		 * map data will refer to a physical disk which isn't actually
+		 * present.  And in that case offload_enabled should already
+		 * be 0, but we'll turn it off here just in case
+		 */
+		if (!logical_drive->phys_disk[i]) {
+			logical_drive->offload_enabled = 0;
+			logical_drive->offload_to_be_enabled = 0;
+			logical_drive->queue_depth = h->nr_cmds;
+		}
+	}
+	if (nraid_map_entries)
+		/*
+		 * This is correct for reads, too high for full stripe writes,
+		 * way too high for partial stripe writes
+		 */
+		logical_drive->queue_depth = qdepth;
+	else
+		logical_drive->queue_depth = h->nr_cmds;
+}
+
+static void hpsa_update_log_drive_phys_drive_ptrs(struct ctlr_info *h,
+				struct hpsa_scsi_dev_t *dev[], int ndevices)
+{
+	int i;
+
+	for (i = 0; i < ndevices; i++) {
+		if (dev[i]->devtype != TYPE_DISK)
+			continue;
+		if (!is_logical_dev_addr_mode(dev[i]->scsi3addr))
+			continue;
+
+		/* If offload is currently enabled, the RAID map and
+		 * phys_disk[] assignment *better* not be changing
+		 * and since it isn't changing, we do not need to
+		 * update it.
+		 */
+		if (dev[i]->offload_enabled)
+			continue;
+
+		hpsa_figure_phys_disk_ptrs(h, dev, ndevices, dev[i]);
+	}
+}
+
 static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno,
 	struct hpsa_scsi_dev_t *sd[], int nsds)
 {
@@ -1334,9 +1781,18 @@ static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno,
 		 */
 		if (sd[i]->volume_offline) {
 			hpsa_show_volume_status(h, sd[i]);
-			dev_info(&h->pdev->dev, "c%db%dt%dl%d: temporarily offline\n",
-				h->scsi_host->host_no,
-				sd[i]->bus, sd[i]->target, sd[i]->lun);
+			dev_info(&h->pdev->dev,
+				"offline scsi %d:%d:%d:%d: %s %.8s %.16s RAID-%s SSDSmartPathCap%c En%c Exp=%d\n",
+				hostno, sd[i]->bus, sd[i]->target, sd[i]->lun,
+				scsi_device_type(sd[i]->devtype),
+				sd[i]->vendor,
+				sd[i]->model,
+				sd[i]->raid_level > RAID_UNKNOWN ?
+					"RAID-?" :
+					raid_label[sd[i]->raid_level],
+				sd[i]->offload_config ? '+' : '-',
+				sd[i]->offload_enabled ? '+' : '-',
+				sd[i]->expose_state);
 			continue;
 		}
 
@@ -1356,6 +1812,14 @@ static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno,
 			/* but if it does happen, we just ignore that device */
 		}
 	}
+	hpsa_update_log_drive_phys_drive_ptrs(h, h->dev, h->ndevices);
+
+	/* Now that h->dev[]->phys_disk[] is coherent, we can enable
+	 * any logical drives that need it enabled.
+	 */
+	for (i = 0; i < h->ndevices; i++)
+		h->dev[i]->offload_enabled = h->dev[i]->offload_to_be_enabled;
+
 	spin_unlock_irqrestore(&h->devlock, flags);
 
 	/* Monitor devices which are in one of several NOT READY states to be
@@ -1379,20 +1843,23 @@ static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno,
 	sh = h->scsi_host;
 	/* Notify scsi mid layer of any removed devices */
 	for (i = 0; i < nremoved; i++) {
-		struct scsi_device *sdev =
-			scsi_device_lookup(sh, removed[i]->bus,
-				removed[i]->target, removed[i]->lun);
-		if (sdev != NULL) {
-			scsi_remove_device(sdev);
-			scsi_device_put(sdev);
-		} else {
-			/* We don't expect to get here.
-			 * future cmds to this device will get selection
-			 * timeout as if the device was gone.
-			 */
-			dev_warn(&h->pdev->dev, "didn't find c%db%dt%dl%d "
-				" for removal.", hostno, removed[i]->bus,
-				removed[i]->target, removed[i]->lun);
+		if (removed[i]->expose_state & HPSA_SCSI_ADD) {
+			struct scsi_device *sdev =
+				scsi_device_lookup(sh, removed[i]->bus,
+					removed[i]->target, removed[i]->lun);
+			if (sdev != NULL) {
+				scsi_remove_device(sdev);
+				scsi_device_put(sdev);
+			} else {
+				/* We don't expect to get here.
+				 * future cmds to this device will get selection
+				 * timeout as if the device was gone.
+				 */
+				dev_warn(&h->pdev->dev,
+					"didn't find scsi %d:%d:%d:%d for removal.",
+					hostno, removed[i]->bus,
+					removed[i]->target, removed[i]->lun);
+			}
 		}
 		kfree(removed[i]);
 		removed[i] = NULL;
@@ -1400,16 +1867,20 @@ static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno,
 
 	/* Notify scsi mid layer of any added devices */
 	for (i = 0; i < nadded; i++) {
+		if (!(added[i]->expose_state & HPSA_SCSI_ADD))
+			continue;
 		if (scsi_add_device(sh, added[i]->bus,
 			added[i]->target, added[i]->lun) == 0)
 			continue;
-		dev_warn(&h->pdev->dev, "scsi_add_device c%db%dt%dl%d failed, "
-			"device not added.\n", hostno, added[i]->bus,
+		dev_warn(&h->pdev->dev,
+			"scsi %d:%d:%d:%d addition failed, device not added.\n",
+			hostno, added[i]->bus,
 			added[i]->target, added[i]->lun);
 		/* now we have to remove it from h->dev,
 		 * since it didn't get added to scsi mid layer
 		 */
 		fixup_botched_add(h, added[i]);
+		added[i] = NULL;
 	}
 
 free_and_out:
@@ -1446,19 +1917,95 @@ static int hpsa_slave_alloc(struct scsi_device *sdev)
 	spin_lock_irqsave(&h->devlock, flags);
 	sd = lookup_hpsa_scsi_dev(h, sdev_channel(sdev),
 		sdev_id(sdev), sdev->lun);
-	if (sd != NULL)
+	if (sd && (sd->expose_state & HPSA_SCSI_ADD)) {
+		int queue_depth = sd->queue_depth;
+
+		if (queue_depth == 0)
+			queue_depth = sdev->host->can_queue;
+
 		sdev->hostdata = sd;
+
+		if (shost_use_blk_mq(sdev->host)) {
+			scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev),
+						queue_depth);
+		} else {
+			/* We depend on tags for cmd allocation. */
+			BUG_ON(!sdev->tagged_supported);
+			scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
+			scsi_activate_tcq(sdev, queue_depth);
+		}
+		atomic_set(&sd->ioaccel_cmds_out, 0);
+	} else {
+		sdev->hostdata = NULL;
+	}
 	spin_unlock_irqrestore(&h->devlock, flags);
 	return 0;
 }
 
-static void hpsa_slave_destroy(struct scsi_device *sdev)
+/* configure scsi device based on internal per-device structure */
+static int hpsa_slave_configure(struct scsi_device *sdev)
 {
-	/* nothing to do. */
-}
+	struct hpsa_scsi_dev_t *sd;
+	unsigned long flags;
+	struct ctlr_info *h;
 
-static void hpsa_free_sg_chain_blocks(struct ctlr_info *h)
-{
+	h = sdev_to_hba(sdev);
+	spin_lock_irqsave(&h->devlock, flags);
+	sd = sdev->hostdata;
+	if (sd && !(sd->expose_state & HPSA_ULD_ATTACH))
+		sdev->no_uld_attach = 1;
+
+	spin_unlock_irqrestore(&h->devlock, flags);
+	return 0;
+}
+
+static void hpsa_slave_destroy(struct scsi_device *sdev)
+{
+	/* nothing to do. */
+}
+
+static void hpsa_free_ioaccel2_sg_chain_blocks(struct ctlr_info *h)
+{
+	int i;
+
+	if (!h->ioaccel2_cmd_sg_list)
+		return;
+	for (i = 0; i < h->nr_cmds; i++) {
+		kfree(h->ioaccel2_cmd_sg_list[i]);
+		h->ioaccel2_cmd_sg_list[i] = NULL;
+	}
+	kfree(h->ioaccel2_cmd_sg_list);
+	h->ioaccel2_cmd_sg_list = NULL;
+}
+
+static int hpsa_allocate_ioaccel2_sg_chain_blocks(struct ctlr_info *h)
+{
+	int i;
+
+	if (h->chainsize <= 0)
+		return 0;
+
+	h->ioaccel2_cmd_sg_list =
+		kzalloc(sizeof(*h->ioaccel2_cmd_sg_list) * h->nr_cmds,
+					GFP_KERNEL);
+	if (!h->ioaccel2_cmd_sg_list)
+		return -ENOMEM;
+	for (i = 0; i < h->nr_cmds; i++) {
+		h->ioaccel2_cmd_sg_list[i] =
+			kmalloc(sizeof(*h->ioaccel2_cmd_sg_list[i]) *
+					h->maxsgentries, GFP_KERNEL);
+		if (!h->ioaccel2_cmd_sg_list[i])
+			goto clean;
+	}
+	return 0;
+
+clean:
+	hpsa_free_ioaccel2_sg_chain_blocks(h);
+	return -ENOMEM;
+}
+
+static void hpsa_free_sg_chain_blocks(struct ctlr_info *h)
+{
 	int i;
 
 	if (!h->cmd_sg_list)
@@ -1471,7 +2018,7 @@ static void hpsa_free_sg_chain_blocks(struct ctlr_info *h)
 	h->cmd_sg_list = NULL;
 }
 
-static int hpsa_allocate_sg_chain_blocks(struct ctlr_info *h)
+static int hpsa_alloc_sg_chain_blocks(struct ctlr_info *h)
 {
 	int i;
 
@@ -1480,13 +2027,17 @@ static int hpsa_allocate_sg_chain_blocks(struct ctlr_info *h)
 
 	h->cmd_sg_list = kzalloc(sizeof(*h->cmd_sg_list) * h->nr_cmds,
 				GFP_KERNEL);
-	if (!h->cmd_sg_list)
+	if (!h->cmd_sg_list) {
+		dev_err(&h->pdev->dev, "Failed to allocate SG list\n");
 		return -ENOMEM;
+	}
 	for (i = 0; i < h->nr_cmds; i++) {
 		h->cmd_sg_list[i] = kmalloc(sizeof(*h->cmd_sg_list[i]) *
 						h->chainsize, GFP_KERNEL);
-		if (!h->cmd_sg_list[i])
+		if (!h->cmd_sg_list[i]) {
+			dev_err(&h->pdev->dev, "Failed to allocate cmd SG\n");
 			goto clean;
+		}
 	}
 	return 0;
 
@@ -1495,27 +2046,60 @@ static int hpsa_allocate_sg_chain_blocks(struct ctlr_info *h)
 	return -ENOMEM;
 }
 
+static int hpsa_map_ioaccel2_sg_chain_block(struct ctlr_info *h,
+	struct io_accel2_cmd *cp, struct CommandList *c)
+{
+	struct ioaccel2_sg_element *chain_block;
+	u64 temp64;
+	u32 chain_size;
+
+	chain_block = h->ioaccel2_cmd_sg_list[c->cmdindex];
+	chain_size = le32_to_cpu(cp->data_len);
+	temp64 = pci_map_single(h->pdev, chain_block, chain_size,
+				PCI_DMA_TODEVICE);
+	if (dma_mapping_error(&h->pdev->dev, temp64)) {
+		/* prevent subsequent unmapping */
+		cp->sg->address = 0;
+		return -1;
+	}
+	cp->sg->address = (u64) cpu_to_le64(temp64);
+	return 0;
+}
+
+static void hpsa_unmap_ioaccel2_sg_chain_block(struct ctlr_info *h,
+	struct io_accel2_cmd *cp)
+{
+	struct ioaccel2_sg_element *chain_sg;
+	u64 temp64;
+	u32 chain_size;
+
+	chain_sg = cp->sg;
+	temp64 = le64_to_cpu(chain_sg->address);
+	chain_size = le32_to_cpu(cp->data_len);
+	pci_unmap_single(h->pdev, temp64, chain_size, PCI_DMA_TODEVICE);
+}
+
 static int hpsa_map_sg_chain_block(struct ctlr_info *h,
 	struct CommandList *c)
 {
 	struct SGDescriptor *chain_sg, *chain_block;
 	u64 temp64;
+	u32 chain_len;
 
 	chain_sg = &c->SG[h->max_cmd_sg_entries - 1];
 	chain_block = h->cmd_sg_list[c->cmdindex];
-	chain_sg->Ext = HPSA_SG_CHAIN;
-	chain_sg->Len = sizeof(*chain_sg) *
+	chain_sg->Ext = cpu_to_le32(HPSA_SG_CHAIN);
+	chain_len = sizeof(*chain_sg) *
 		(c->Header.SGTotal - h->max_cmd_sg_entries);
-	temp64 = pci_map_single(h->pdev, chain_block, chain_sg->Len,
+	chain_sg->Len = cpu_to_le32(chain_len);
+	temp64 = pci_map_single(h->pdev, chain_block, chain_len,
 				PCI_DMA_TODEVICE);
 	if (dma_mapping_error(&h->pdev->dev, temp64)) {
 		/* prevent subsequent unmapping */
-		chain_sg->Addr.lower = 0;
-		chain_sg->Addr.upper = 0;
+		chain_sg->Addr = 0;
 		return -1;
 	}
-	chain_sg->Addr.lower = (u32) (temp64 & 0x0FFFFFFFFULL);
-	chain_sg->Addr.upper = (u32) ((temp64 >> 32) & 0x0FFFFFFFFULL);
+	chain_sg->Addr = cpu_to_le64(temp64);
 	return 0;
 }
 
@@ -1523,15 +2107,13 @@ static void hpsa_unmap_sg_chain_block(struct ctlr_info *h,
 	struct CommandList *c)
 {
 	struct SGDescriptor *chain_sg;
-	union u64bit temp64;
 
-	if (c->Header.SGTotal <= h->max_cmd_sg_entries)
+	if (le16_to_cpu(c->Header.SGTotal) <= h->max_cmd_sg_entries)
 		return;
 
 	chain_sg = &c->SG[h->max_cmd_sg_entries - 1];
-	temp64.val32.lower = chain_sg->Addr.lower;
-	temp64.val32.upper = chain_sg->Addr.upper;
-	pci_unmap_single(h->pdev, temp64.val, chain_sg->Len, PCI_DMA_TODEVICE);
+	pci_unmap_single(h->pdev, le64_to_cpu(chain_sg->Addr),
+			le32_to_cpu(chain_sg->Len), PCI_DMA_TODEVICE);
 }
 
 
@@ -1546,6 +2128,7 @@ static int handle_ioaccel_mode2_error(struct ctlr_info *h,
 {
 	int data_len;
 	int retry = 0;
+	u32 ioaccel2_resid = 0;
 
 	switch (c2->error_data.serv_response) {
 	case IOACCEL2_SERV_RESPONSE_COMPLETE:
@@ -1553,9 +2136,6 @@ static int handle_ioaccel_mode2_error(struct ctlr_info *h,
 		case IOACCEL2_STATUS_SR_TASK_COMP_GOOD:
 			break;
 		case IOACCEL2_STATUS_SR_TASK_COMP_CHK_COND:
-			dev_warn(&h->pdev->dev,
-				"%s: task complete with check condition.\n",
-				"HP SSD Smart Path");
 			cmd->result |= SAM_STAT_CHECK_CONDITION;
 			if (c2->error_data.data_present !=
 					IOACCEL2_SENSE_DATA_PRESENT) {
@@ -1575,58 +2155,58 @@ static int handle_ioaccel_mode2_error(struct ctlr_info *h,
 			retry = 1;
 			break;
 		case IOACCEL2_STATUS_SR_TASK_COMP_BUSY:
-			dev_warn(&h->pdev->dev,
-				"%s: task complete with BUSY status.\n",
-				"HP SSD Smart Path");
 			retry = 1;
 			break;
 		case IOACCEL2_STATUS_SR_TASK_COMP_RES_CON:
-			dev_warn(&h->pdev->dev,
-				"%s: task complete with reservation conflict.\n",
-				"HP SSD Smart Path");
 			retry = 1;
 			break;
 		case IOACCEL2_STATUS_SR_TASK_COMP_SET_FULL:
-			/* Make scsi midlayer do unlimited retries */
-			cmd->result = DID_IMM_RETRY << 16;
+			retry = 1;
 			break;
 		case IOACCEL2_STATUS_SR_TASK_COMP_ABORTED:
-			dev_warn(&h->pdev->dev,
-				"%s: task complete with aborted status.\n",
-				"HP SSD Smart Path");
 			retry = 1;
 			break;
 		default:
-			dev_warn(&h->pdev->dev,
-				"%s: task complete with unrecognized status: 0x%02x\n",
-				"HP SSD Smart Path", c2->error_data.status);
 			retry = 1;
 			break;
 		}
 		break;
 	case IOACCEL2_SERV_RESPONSE_FAILURE:
-		/* don't expect to get here. */
-		dev_warn(&h->pdev->dev,
-			"unexpected delivery or target failure, status = 0x%02x\n",
-			c2->error_data.status);
-		retry = 1;
+		switch (c2->error_data.status) {
+		case IOACCEL2_STATUS_SR_IO_ERROR:
+		case IOACCEL2_STATUS_SR_IO_ABORTED:
+		case IOACCEL2_STATUS_SR_OVERRUN:
+			retry = 1;
+			break;
+		case IOACCEL2_STATUS_SR_UNDERRUN:
+			cmd->result = (DID_OK << 16);		/* host byte */
+			cmd->result |= (COMMAND_COMPLETE << 8);	/* msg byte */
+			ioaccel2_resid = c2->error_data.resid_cnt[3] << 24;
+			ioaccel2_resid |= c2->error_data.resid_cnt[2] << 16;
+			ioaccel2_resid |= c2->error_data.resid_cnt[1] << 8;
+			ioaccel2_resid |= c2->error_data.resid_cnt[0];
+			scsi_set_resid(cmd, ioaccel2_resid);
+			break;
+		case IOACCEL2_STATUS_SR_NO_PATH_TO_DEVICE:
+		case IOACCEL2_STATUS_SR_INVALID_DEVICE:
+		case IOACCEL2_STATUS_SR_IOACCEL_DISABLED:
+			/* We will get an event from ctlr to trigger rescan */
+			retry = 1;
+			break;
+		default:
+			retry = 1;
+		}
 		break;
 	case IOACCEL2_SERV_RESPONSE_TMF_COMPLETE:
 		break;
 	case IOACCEL2_SERV_RESPONSE_TMF_SUCCESS:
 		break;
 	case IOACCEL2_SERV_RESPONSE_TMF_REJECTED:
-		dev_warn(&h->pdev->dev, "task management function rejected.\n");
 		retry = 1;
 		break;
 	case IOACCEL2_SERV_RESPONSE_TMF_WRONG_LUN:
-		dev_warn(&h->pdev->dev, "task management function invalid LUN\n");
 		break;
 	default:
-		dev_warn(&h->pdev->dev,
-			"%s: Unrecognized server response: 0x%02x\n",
-			"HP SSD Smart Path",
-			c2->error_data.serv_response);
 		retry = 1;
 		break;
 	}
@@ -1634,20 +2214,65 @@ static int handle_ioaccel_mode2_error(struct ctlr_info *h,
 	return retry;	/* retry on raid path? */
 }
 
+static void hpsa_cmd_free_and_done(struct ctlr_info *h,
+		struct CommandList *c, struct scsi_cmnd *cmd)
+{
+	/* Prevent the following race in the abort handler:
+	 *
+	 * 1. LLD is requested to abort a SCSI command
+	 * 2. The SCSI command completes
+	 * 3. The struct CommandList associated with step 2 is made available
+	 * 4. New I/O request to LLD to another LUN re-uses struct CommandList
+	 * 5. Abort handler follows scsi_cmnd->host_scribble and
+	 *    finds struct CommandList and tries to aborts it
+	 * Now we have aborted the wrong command.
+	 *
+	 * Clear c->scsi_cmd here so that if this command gets re-used, the
+	 * abort handler will know it's a different scsi_cmnd.
+	 */
+	c->scsi_cmd = NULL;
+	cmd_tagged_free(h, c);
+	cmd->scsi_done(cmd);
+}
+
+static void hpsa_retry_cmd(struct ctlr_info *h, struct CommandList *c)
+{
+	INIT_WORK(&c->work, hpsa_command_resubmit_worker);
+	queue_work_on(raw_smp_processor_id(), h->resubmit_wq, &c->work);
+}
+
+static void hpsa_set_scsi_cmd_aborted(struct scsi_cmnd *cmd)
+{
+	cmd->result = DID_ABORT << 16;
+}
+
+static void hpsa_cmd_abort_and_free(struct ctlr_info *h, struct CommandList *c,
+				    struct scsi_cmnd *cmd)
+{
+	hpsa_set_scsi_cmd_aborted(cmd);
+	dev_warn(&h->pdev->dev, "CDB %16phN was aborted with status 0x%x\n",
+			 c->Request.CDB, c->err_info->ScsiStatus);
+	c->scsi_cmd = NULL;
+	cmd_tagged_free(h, c);
+	wake_up_all(&h->abort_sync_wait_queue);
+}
+
 static void process_ioaccel2_completion(struct ctlr_info *h,
 		struct CommandList *c, struct scsi_cmnd *cmd,
 		struct hpsa_scsi_dev_t *dev)
 {
 	struct io_accel2_cmd *c2 = &h->ioaccel2_cmd_pool[c->cmdindex];
-	int raid_retry = 0;
+
+	atomic_dec(&c->phys_disk->ioaccel_cmds_out);
 
 	/* check for good status */
 	if (likely(c2->error_data.serv_response == 0 &&
-			c2->error_data.status == 0)) {
-		cmd_free(h, c);
-		cmd->scsi_done(cmd);
-		return;
-	}
+			c2->error_data.status == 0))
+		return hpsa_cmd_free_and_done(h, c, cmd);
+
+	/* don't requeue a command which is being aborted */
+	if (unlikely(c->abort_pending))
+		return hpsa_cmd_abort_and_free(h, c, cmd);
 
 	/* Any RAID offload error results in retry which will use
 	 * the normal I/O path so the controller can handle whatever's
@@ -1656,26 +2281,44 @@ static void process_ioaccel2_completion(struct ctlr_info *h,
 	if (is_logical_dev_addr_mode(dev->scsi3addr) &&
 		c2->error_data.serv_response ==
 			IOACCEL2_SERV_RESPONSE_FAILURE) {
-		dev->offload_enabled = 0;
-		h->drv_req_rescan = 1;	/* schedule controller for a rescan */
-		cmd->result = DID_SOFT_ERROR << 16;
-		cmd_free(h, c);
-		cmd->scsi_done(cmd);
-		return;
+		if (c2->error_data.status ==
+			IOACCEL2_STATUS_SR_IOACCEL_DISABLED)
+			dev->offload_enabled = 0;
+
+		return hpsa_retry_cmd(h, c);
 	}
-	raid_retry = handle_ioaccel_mode2_error(h, c, cmd, c2);
-	/* If error found, disable Smart Path, schedule a rescan,
-	 * and force a retry on the standard path.
-	 */
-	if (raid_retry) {
-		dev_warn(&h->pdev->dev, "%s: Retrying on standard path.\n",
-			"HP SSD Smart Path");
-		dev->offload_enabled = 0; /* Disable Smart Path */
-		h->drv_req_rescan = 1;	  /* schedule controller rescan */
-		cmd->result = DID_SOFT_ERROR << 16;
+
+	if (handle_ioaccel_mode2_error(h, c, cmd, c2))
+		return hpsa_retry_cmd(h, c);
+
+	return hpsa_cmd_free_and_done(h, c, cmd);
+}
+
+/* Returns 0 on success, < 0 otherwise. */
+static int hpsa_evaluate_tmf_status(struct ctlr_info *h,
+					struct CommandList *cp)
+{
+	u8 tmf_status = cp->err_info->ScsiStatus;
+
+	switch (tmf_status) {
+	case CISS_TMF_COMPLETE:
+		/* CISS_TMF_COMPLETE never happens, instead,
+		 * ei->CommandStatus == 0 for this case.
+		 */
+	case CISS_TMF_SUCCESS:
+		return 0;
+	case CISS_TMF_INVALID_FRAME:
+	case CISS_TMF_NOT_SUPPORTED:
+	case CISS_TMF_FAILED:
+	case CISS_TMF_WRONG_LUN:
+	case CISS_TMF_OVERLAPPED_TAG:
+		break;
+	default:
+		dev_warn(&h->pdev->dev, "Unknown TMF status: %02x\n",
+				tmf_status);
+		break;
 	}
-	cmd_free(h, c);
-	cmd->scsi_done(cmd);
+	return -tmf_status;
 }
 
 static void complete_scsi_command(struct CommandList *cp)
@@ -1684,56 +2327,60 @@ static void complete_scsi_command(struct CommandList *cp)
 	struct ctlr_info *h;
 	struct ErrorInfo *ei;
 	struct hpsa_scsi_dev_t *dev;
+	struct io_accel2_cmd *c2;
 
-	unsigned char sense_key;
-	unsigned char asc;      /* additional sense code */
-	unsigned char ascq;     /* additional sense code qualifier */
+	int sense_key;
+	int asc;      /* additional sense code */
+	int ascq;     /* additional sense code qualifier */
 	unsigned long sense_data_size;
 
 	ei = cp->err_info;
-	cmd = (struct scsi_cmnd *) cp->scsi_cmd;
+	cmd = cp->scsi_cmd;
 	h = cp->h;
 	dev = cmd->device->hostdata;
+	c2 = &h->ioaccel2_cmd_pool[cp->cmdindex];
 
 	scsi_dma_unmap(cmd); /* undo the DMA mappings */
 	if ((cp->cmd_type == CMD_SCSI) &&
 		(cp->Header.SGTotal > h->max_cmd_sg_entries))
 		hpsa_unmap_sg_chain_block(h, cp);
 
+	if ((cp->cmd_type == CMD_IOACCEL2) &&
+		(c2->sg[0].chain_indicator == IOACCEL2_CHAIN))
+		hpsa_unmap_ioaccel2_sg_chain_block(h, c2);
+
 	cmd->result = (DID_OK << 16); 		/* host byte */
 	cmd->result |= (COMMAND_COMPLETE << 8);	/* msg byte */
 
+	/* We check for lockup status here as it may be set for
+	 * CMD_SCSI, CMD_IOACCEL1 and CMD_IOACCEL2 commands by
+	 * fail_all_oustanding_cmds()
+	 */
+	if (unlikely(ei->CommandStatus == CMD_CTLR_LOCKUP)) {
+		/* DID_NO_CONNECT will prevent a retry */
+		cmd->result = DID_NO_CONNECT << 16;
+		return hpsa_cmd_free_and_done(h, cp, cmd);
+	}
+
 	if (cp->cmd_type == CMD_IOACCEL2)
 		return process_ioaccel2_completion(h, cp, cmd, dev);
 
-	cmd->result |= ei->ScsiStatus;
-
 	scsi_set_resid(cmd, ei->ResidualCnt);
 	if (ei->CommandStatus == 0) {
-		cmd_free(h, cp);
-		cmd->scsi_done(cmd);
-		return;
+		if (cp->cmd_type == CMD_IOACCEL1)
+			atomic_dec(&cp->phys_disk->ioaccel_cmds_out);
+		return hpsa_cmd_free_and_done(h, cp, cmd);
 	}
 
-	/* copy the sense data */
-	if (SCSI_SENSE_BUFFERSIZE < sizeof(ei->SenseInfo))
-		sense_data_size = SCSI_SENSE_BUFFERSIZE;
-	else
-		sense_data_size = sizeof(ei->SenseInfo);
-	if (ei->SenseLen < sense_data_size)
-		sense_data_size = ei->SenseLen;
-
-	memcpy(cmd->sense_buffer, ei->SenseInfo, sense_data_size);
-
 	/* For I/O accelerator commands, copy over some fields to the normal
 	 * CISS header used below for error handling.
 	 */
 	if (cp->cmd_type == CMD_IOACCEL1) {
 		struct io_accel1_cmd *c = &h->ioaccel_cmd_pool[cp->cmdindex];
+		atomic_dec(&cp->phys_disk->ioaccel_cmds_out);
 		cp->Header.SGList = cp->Header.SGTotal = scsi_sg_count(cmd);
 		cp->Request.CDBLen = c->io_flags & IOACCEL1_IOFLAGS_CDBLEN_MASK;
-		cp->Header.Tag.lower = c->Tag.lower;
-		cp->Header.Tag.upper = c->Tag.upper;
+		cp->Header.tag = c->tag;
 		memcpy(cp->Header.LUN.LunAddrBytes, c->CISS_LUN, 8);
 		memcpy(cp->Request.CDB, c->CDB, cp->Request.CDBLen);
 
@@ -1744,91 +2391,37 @@ static void complete_scsi_command(struct CommandList *cp)
 		if (is_logical_dev_addr_mode(dev->scsi3addr)) {
 			if (ei->CommandStatus == CMD_IOACCEL_DISABLED)
 				dev->offload_enabled = 0;
-			cmd->result = DID_SOFT_ERROR << 16;
-			cmd_free(h, cp);
-			cmd->scsi_done(cmd);
-			return;
+			if (!cp->abort_pending)
+				return hpsa_retry_cmd(h, cp);
 		}
 	}
 
+	if (cp->abort_pending)
+		ei->CommandStatus = CMD_ABORTED;
+
 	/* an error has occurred */
 	switch (ei->CommandStatus) {
 
 	case CMD_TARGET_STATUS:
-		if (ei->ScsiStatus) {
-			/* Get sense key */
-			sense_key = 0xf & ei->SenseInfo[2];
-			/* Get additional sense code */
-			asc = ei->SenseInfo[12];
-			/* Get addition sense code qualifier */
-			ascq = ei->SenseInfo[13];
-		}
-
+		cmd->result |= ei->ScsiStatus;
+		/* copy the sense data */
+		if (SCSI_SENSE_BUFFERSIZE < sizeof(ei->SenseInfo))
+			sense_data_size = SCSI_SENSE_BUFFERSIZE;
+		else
+			sense_data_size = sizeof(ei->SenseInfo);
+		if (ei->SenseLen < sense_data_size)
+			sense_data_size = ei->SenseLen;
+		memcpy(cmd->sense_buffer, ei->SenseInfo, sense_data_size);
+		if (ei->ScsiStatus)
+			decode_sense_data(ei->SenseInfo, sense_data_size,
+				&sense_key, &asc, &ascq);
 		if (ei->ScsiStatus == SAM_STAT_CHECK_CONDITION) {
-			if (check_for_unit_attention(h, cp))
-				break;
-			if (sense_key == ILLEGAL_REQUEST) {
-				/*
-				 * SCSI REPORT_LUNS is commonly unsupported on
-				 * Smart Array.  Suppress noisy complaint.
-				 */
-				if (cp->Request.CDB[0] == REPORT_LUNS)
-					break;
-
-				/* If ASC/ASCQ indicate Logical Unit
-				 * Not Supported condition,
-				 */
-				if ((asc == 0x25) && (ascq == 0x0)) {
-					dev_warn(&h->pdev->dev, "cp %p "
-						"has check condition\n", cp);
-					break;
-				}
-			}
-
-			if (sense_key == NOT_READY) {
-				/* If Sense is Not Ready, Logical Unit
-				 * Not ready, Manual Intervention
-				 * required
-				 */
-				if ((asc == 0x04) && (ascq == 0x03)) {
-					dev_warn(&h->pdev->dev, "cp %p "
-						"has check condition: unit "
-						"not ready, manual "
-						"intervention required\n", cp);
-					break;
-				}
-			}
 			if (sense_key == ABORTED_COMMAND) {
-				/* Aborted command is retryable */
-				dev_warn(&h->pdev->dev, "cp %p "
-					"has check condition: aborted command: "
-					"ASC: 0x%x, ASCQ: 0x%x\n",
-					cp, asc, ascq);
 				cmd->result |= DID_SOFT_ERROR << 16;
 				break;
 			}
-			/* Must be some other type of check condition */
-			dev_dbg(&h->pdev->dev, "cp %p has check condition: "
-					"unknown type: "
-					"Sense: 0x%x, ASC: 0x%x, ASCQ: 0x%x, "
-					"Returning result: 0x%x, "
-					"cmd=[%02x %02x %02x %02x %02x "
-					"%02x %02x %02x %02x %02x %02x "
-					"%02x %02x %02x %02x %02x]\n",
-					cp, sense_key, asc, ascq,
-					cmd->result,
-					cmd->cmnd[0], cmd->cmnd[1],
-					cmd->cmnd[2], cmd->cmnd[3],
-					cmd->cmnd[4], cmd->cmnd[5],
-					cmd->cmnd[6], cmd->cmnd[7],
-					cmd->cmnd[8], cmd->cmnd[9],
-					cmd->cmnd[10], cmd->cmnd[11],
-					cmd->cmnd[12], cmd->cmnd[13],
-					cmd->cmnd[14], cmd->cmnd[15]);
 			break;
 		}
-
-
 		/* Problem was not a check condition
 		 * Pass it up to the upper layers...
 		 */
@@ -1862,9 +2455,8 @@ static void complete_scsi_command(struct CommandList *cp)
 	case CMD_DATA_UNDERRUN: /* let mid layer handle it. */
 		break;
 	case CMD_DATA_OVERRUN:
-		dev_warn(&h->pdev->dev, "cp %p has"
-			" completed with data overrun "
-			"reported\n", cp);
+		dev_warn(&h->pdev->dev,
+			"CDB %16phN data overrun\n", cp->Request.CDB);
 		break;
 	case CMD_INVALID: {
 		/* print_bytes(cp, sizeof(*cp), 1, 0);
@@ -1880,39 +2472,45 @@ static void complete_scsi_command(struct CommandList *cp)
 		break;
 	case CMD_PROTOCOL_ERR:
 		cmd->result = DID_ERROR << 16;
-		dev_warn(&h->pdev->dev, "cp %p has "
-			"protocol error\n", cp);
+		dev_warn(&h->pdev->dev, "CDB %16phN : protocol error\n",
+				cp->Request.CDB);
 		break;
 	case CMD_HARDWARE_ERR:
 		cmd->result = DID_ERROR << 16;
-		dev_warn(&h->pdev->dev, "cp %p had  hardware error\n", cp);
+		dev_warn(&h->pdev->dev, "CDB %16phN : hardware error\n",
+			cp->Request.CDB);
 		break;
 	case CMD_CONNECTION_LOST:
 		cmd->result = DID_ERROR << 16;
-		dev_warn(&h->pdev->dev, "cp %p had connection lost\n", cp);
+		dev_warn(&h->pdev->dev, "CDB %16phN : connection lost\n",
+			cp->Request.CDB);
 		break;
 	case CMD_ABORTED:
-		cmd->result = DID_ABORT << 16;
-		dev_warn(&h->pdev->dev, "cp %p was aborted with status 0x%x\n",
-				cp, ei->ScsiStatus);
-		break;
+		/* Return now to avoid calling scsi_done(). */
+		return hpsa_cmd_abort_and_free(h, cp, cmd);
 	case CMD_ABORT_FAILED:
 		cmd->result = DID_ERROR << 16;
-		dev_warn(&h->pdev->dev, "cp %p reports abort failed\n", cp);
+		dev_warn(&h->pdev->dev, "CDB %16phN : abort failed\n",
+			cp->Request.CDB);
 		break;
 	case CMD_UNSOLICITED_ABORT:
 		cmd->result = DID_SOFT_ERROR << 16; /* retry the command */
-		dev_warn(&h->pdev->dev, "cp %p aborted due to an unsolicited "
-			"abort\n", cp);
+		dev_warn(&h->pdev->dev, "CDB %16phN : unsolicited abort\n",
+			cp->Request.CDB);
 		break;
 	case CMD_TIMEOUT:
 		cmd->result = DID_TIME_OUT << 16;
-		dev_warn(&h->pdev->dev, "cp %p timedout\n", cp);
+		dev_warn(&h->pdev->dev, "CDB %16phN timedout\n",
+			cp->Request.CDB);
 		break;
 	case CMD_UNABORTABLE:
 		cmd->result = DID_ERROR << 16;
 		dev_warn(&h->pdev->dev, "Command unabortable\n");
 		break;
+	case CMD_TMF_STATUS:
+		if (hpsa_evaluate_tmf_status(h, cp)) /* TMF failed? */
+			cmd->result = DID_ERROR << 16;
+		break;
 	case CMD_IOACCEL_DISABLED:
 		/* This only handles the direct pass-through case since RAID
 		 * offload is handled above.  Just attempt a retry.
@@ -1926,22 +2524,19 @@ static void complete_scsi_command(struct CommandList *cp)
 		dev_warn(&h->pdev->dev, "cp %p returned unknown status %x\n",
 				cp, ei->CommandStatus);
 	}
-	cmd_free(h, cp);
-	cmd->scsi_done(cmd);
+
+	return hpsa_cmd_free_and_done(h, cp, cmd);
 }
 
 static void hpsa_pci_unmap(struct pci_dev *pdev,
 	struct CommandList *c, int sg_used, int data_direction)
 {
 	int i;
-	union u64bit addr64;
 
-	for (i = 0; i < sg_used; i++) {
-		addr64.val32.lower = c->SG[i].Addr.lower;
-		addr64.val32.upper = c->SG[i].Addr.upper;
-		pci_unmap_single(pdev, (dma_addr_t) addr64.val, c->SG[i].Len,
-			data_direction);
-	}
+	for (i = 0; i < sg_used; i++)
+		pci_unmap_single(pdev, (dma_addr_t) le64_to_cpu(c->SG[i].Addr),
+				le32_to_cpu(c->SG[i].Len),
+				data_direction);
 }
 
 static int hpsa_map_one(struct pci_dev *pdev,
@@ -1954,7 +2549,7 @@ static int hpsa_map_one(struct pci_dev *pdev,
 
 	if (buflen == 0 || data_direction == PCI_DMA_NONE) {
 		cp->Header.SGList = 0;
-		cp->Header.SGTotal = 0;
+		cp->Header.SGTotal = cpu_to_le16(0);
 		return 0;
 	}
 
@@ -1962,28 +2557,47 @@ static int hpsa_map_one(struct pci_dev *pdev,
 	if (dma_mapping_error(&pdev->dev, addr64)) {
 		/* Prevent subsequent unmap of something never mapped */
 		cp->Header.SGList = 0;
-		cp->Header.SGTotal = 0;
+		cp->Header.SGTotal = cpu_to_le16(0);
 		return -1;
 	}
-	cp->SG[0].Addr.lower =
-	  (u32) (addr64 & (u64) 0x00000000FFFFFFFF);
-	cp->SG[0].Addr.upper =
-	  (u32) ((addr64 >> 32) & (u64) 0x00000000FFFFFFFF);
-	cp->SG[0].Len = buflen;
-	cp->SG[0].Ext = HPSA_SG_LAST; /* we are not chaining */
+	cp->SG[0].Addr = cpu_to_le64(addr64);
+	cp->SG[0].Len = cpu_to_le32(buflen);
+	cp->SG[0].Ext = cpu_to_le32(HPSA_SG_LAST); /* we are not chaining */
 	cp->Header.SGList = (u8) 1;   /* no. SGs contig in this cmd */
-	cp->Header.SGTotal = (u16) 1; /* total sgs in this cmd list */
+	cp->Header.SGTotal = (u16) cpu_to_le16(1); /* total sgs in cmd list */
 	return 0;
 }
 
-static inline void hpsa_scsi_do_simple_cmd_core(struct ctlr_info *h,
-	struct CommandList *c)
+#define NO_TIMEOUT ((unsigned long) -1)
+#define DEFAULT_TIMEOUT (30000) /* milliseconds */
+static int hpsa_scsi_do_simple_cmd_core(struct ctlr_info *h,
+	struct CommandList *c, int reply_queue, unsigned long timeout_msecs)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 
 	c->waiting = &wait;
-	enqueue_cmd_and_start_io(h, c);
-	wait_for_completion(&wait);
+	__enqueue_cmd_and_start_io(h, c, reply_queue);
+	if (timeout_msecs == NO_TIMEOUT) {
+		/* TODO: get rid of this no-timeout thing */
+		wait_for_completion_io(&wait);
+		return 0;
+	}
+	if (!wait_for_completion_io_timeout(&wait,
+					msecs_to_jiffies(timeout_msecs))) {
+		dev_warn(&h->pdev->dev, "Command timed out.\n");
+		return -ETIMEDOUT;
+	}
+	return 0;
+}
+
+static int hpsa_scsi_do_simple_cmd(struct ctlr_info *h, struct CommandList *c,
+				   int reply_queue, unsigned long timeout_msecs)
+{
+	if (unlikely(lockup_detected(h))) {
+		c->err_info->CommandStatus = CMD_CTLR_LOCKUP;
+		return 0;
+	}
+	return hpsa_scsi_do_simple_cmd_core(h, c, reply_queue, timeout_msecs);
 }
 
 static u32 lockup_detected(struct ctlr_info *h)
@@ -1998,25 +2612,19 @@ static u32 lockup_detected(struct ctlr_info *h)
 	return rc;
 }
 
-static void hpsa_scsi_do_simple_cmd_core_if_no_lockup(struct ctlr_info *h,
-	struct CommandList *c)
-{
-	/* If controller lockup detected, fake a hardware error. */
-	if (unlikely(lockup_detected(h)))
-		c->err_info->CommandStatus = CMD_HARDWARE_ERR;
-	else
-		hpsa_scsi_do_simple_cmd_core(h, c);
-}
-
 #define MAX_DRIVER_CMD_RETRIES 25
-static void hpsa_scsi_do_simple_cmd_with_retry(struct ctlr_info *h,
-	struct CommandList *c, int data_direction)
+static int hpsa_scsi_do_simple_cmd_with_retry(struct ctlr_info *h,
+	struct CommandList *c, int data_direction, unsigned long timeout_msecs)
 {
 	int backoff_time = 10, retry_count = 0;
+	int rc;
 
 	do {
 		memset(c->err_info, 0, sizeof(*c->err_info));
-		hpsa_scsi_do_simple_cmd_core(h, c);
+		rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE,
+						  timeout_msecs);
+		if (rc)
+			break;
 		retry_count++;
 		if (retry_count > 3) {
 			msleep(backoff_time);
@@ -2027,6 +2635,9 @@ static void hpsa_scsi_do_simple_cmd_with_retry(struct ctlr_info *h,
 			check_for_busy(h, c)) &&
 			retry_count <= MAX_DRIVER_CMD_RETRIES);
 	hpsa_pci_unmap(h->pdev, c, 1, data_direction);
+	if (retry_count > MAX_DRIVER_CMD_RETRIES)
+		rc = -1; /* FIXME do something better? */
+	return rc;
 }
 
 static void hpsa_print_cmd(struct ctlr_info *h, char *txt,
@@ -2050,14 +2661,20 @@ static void hpsa_scsi_interpret_error(struct ctlr_info *h,
 {
 	const struct ErrorInfo *ei = cp->err_info;
 	struct device *d = &cp->h->pdev->dev;
-	const u8 *sd = ei->SenseInfo;
+	int sense_key, asc, ascq, sense_len;
 
 	switch (ei->CommandStatus) {
 	case CMD_TARGET_STATUS:
+		if (ei->SenseLen > sizeof(ei->SenseInfo))
+			sense_len = sizeof(ei->SenseInfo);
+		else
+			sense_len = ei->SenseLen;
+		decode_sense_data(ei->SenseInfo, sense_len,
+					&sense_key, &asc, &ascq);
 		hpsa_print_cmd(h, "SCSI status", cp);
 		if (ei->ScsiStatus == SAM_STAT_CHECK_CONDITION)
 			dev_warn(d, "SCSI Status = 02, Sense key = %02x, ASC = %02x, ASCQ = %02x\n",
-				sd[2] & 0x0f, sd[12], sd[13]);
+				sense_key, asc, ascq);
 		else
 			dev_warn(d, "SCSI Status = %02x\n", ei->ScsiStatus);
 		if (ei->ScsiStatus == 0)
@@ -2103,6 +2720,9 @@ static void hpsa_scsi_interpret_error(struct ctlr_info *h,
 	case CMD_UNABORTABLE:
 		hpsa_print_cmd(h, "unabortable", cp);
 		break;
+	case CMD_CTLR_LOCKUP:
+		hpsa_print_cmd(h, "controller lockup detected", cp);
+		break;
 	default:
 		hpsa_print_cmd(h, "unknown status", cp);
 		dev_warn(d, "Unknown command status %x\n",
@@ -2118,26 +2738,24 @@ static int hpsa_scsi_do_inquiry(struct ctlr_info *h, unsigned char *scsi3addr,
 	struct CommandList *c;
 	struct ErrorInfo *ei;
 
-	c = cmd_special_alloc(h);
-
-	if (c == NULL) {			/* trouble... */
-		dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
-		return -ENOMEM;
-	}
+	c = cmd_alloc(h);
 
 	if (fill_cmd(c, HPSA_INQUIRY, h, buf, bufsize,
 			page, scsi3addr, TYPE_CMD)) {
 		rc = -1;
 		goto out;
 	}
-	hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+	rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+					PCI_DMA_FROMDEVICE, NO_TIMEOUT);
+	if (rc)
+		goto out;
 	ei = c->err_info;
 	if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
 		hpsa_scsi_interpret_error(h, c);
 		rc = -1;
 	}
 out:
-	cmd_special_free(h, c);
+	cmd_free(h, c);
 	return rc;
 }
 
@@ -2149,48 +2767,45 @@ static int hpsa_bmic_ctrl_mode_sense(struct ctlr_info *h,
 	struct CommandList *c;
 	struct ErrorInfo *ei;
 
-	c = cmd_special_alloc(h);
-
-	if (c == NULL) {			/* trouble... */
-		dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
-		return -ENOMEM;
-	}
+	c = cmd_alloc(h);
 
 	if (fill_cmd(c, BMIC_SENSE_CONTROLLER_PARAMETERS, h, buf, bufsize,
 			page, scsi3addr, TYPE_CMD)) {
 		rc = -1;
 		goto out;
 	}
-	hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+	rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+			PCI_DMA_FROMDEVICE, NO_TIMEOUT);
+	if (rc)
+		goto out;
 	ei = c->err_info;
 	if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
 		hpsa_scsi_interpret_error(h, c);
 		rc = -1;
 	}
 out:
-	cmd_special_free(h, c);
+	cmd_free(h, c);
 	return rc;
-	}
+}
 
 static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
-	u8 reset_type)
+	u8 reset_type, int reply_queue)
 {
 	int rc = IO_OK;
 	struct CommandList *c;
 	struct ErrorInfo *ei;
 
-	c = cmd_special_alloc(h);
-
-	if (c == NULL) {			/* trouble... */
-		dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
-		return -ENOMEM;
-	}
+	c = cmd_alloc(h);
 
 	/* fill_cmd can't fail here, no data buffer to map. */
 	(void) fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0,
 			scsi3addr, TYPE_MSG);
 	c->Request.CDB[1] = reset_type; /* fill_cmd defaults to LUN reset */
-	hpsa_scsi_do_simple_cmd_core(h, c);
+	rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
+	if (rc) {
+		dev_warn(&h->pdev->dev, "Failed to send reset command\n");
+		goto out;
+	}
 	/* no unmap needed here because no data xfer. */
 
 	ei = c->err_info;
@@ -2198,7 +2813,8 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
 		hpsa_scsi_interpret_error(h, c);
 		rc = -1;
 	}
-	cmd_special_free(h, c);
+out:
+	cmd_free(h, c);
 	return rc;
 }
 
@@ -2308,26 +2924,26 @@ static int hpsa_get_raid_map(struct ctlr_info *h,
 	struct CommandList *c;
 	struct ErrorInfo *ei;
 
-	c = cmd_special_alloc(h);
-	if (c == NULL) {
-		dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
-		return -ENOMEM;
-	}
+	c = cmd_alloc(h);
+
 	if (fill_cmd(c, HPSA_GET_RAID_MAP, h, &this_device->raid_map,
 			sizeof(this_device->raid_map), 0,
 			scsi3addr, TYPE_CMD)) {
-		dev_warn(&h->pdev->dev, "Out of memory in hpsa_get_raid_map()\n");
-		cmd_special_free(h, c);
-		return -ENOMEM;
+		dev_warn(&h->pdev->dev, "hpsa_get_raid_map fill_cmd failed\n");
+		cmd_free(h, c);
+		return -1;
 	}
-	hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+	rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+					PCI_DMA_FROMDEVICE, NO_TIMEOUT);
+	if (rc)
+		goto out;
 	ei = c->err_info;
 	if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
 		hpsa_scsi_interpret_error(h, c);
-		cmd_special_free(h, c);
-		return -1;
+		rc = -1;
+		goto out;
 	}
-	cmd_special_free(h, c);
+	cmd_free(h, c);
 
 	/* @todo in the future, dynamically allocate RAID map memory */
 	if (le32_to_cpu(this_device->raid_map.structure_size) >
@@ -2337,6 +2953,38 @@ static int hpsa_get_raid_map(struct ctlr_info *h,
 	}
 	hpsa_debug_map_buff(h, rc, &this_device->raid_map);
 	return rc;
+out:
+	cmd_free(h, c);
+	return rc;
+}
+
+static int hpsa_bmic_id_physical_device(struct ctlr_info *h,
+		unsigned char scsi3addr[], u16 bmic_device_index,
+		struct bmic_identify_physical_device *buf, size_t bufsize)
+{
+	int rc = IO_OK;
+	struct CommandList *c;
+	struct ErrorInfo *ei;
+
+	c = cmd_alloc(h);
+	rc = fill_cmd(c, BMIC_IDENTIFY_PHYSICAL_DEVICE, h, buf, bufsize,
+		0, RAID_CTLR_LUNID, TYPE_CMD);
+	if (rc)
+		goto out;
+
+	c->Request.CDB[2] = bmic_device_index & 0xff;
+	c->Request.CDB[9] = (bmic_device_index >> 8) & 0xff;
+
+	hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE,
+						NO_TIMEOUT);
+	ei = c->err_info;
+	if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
+		hpsa_scsi_interpret_error(h, c);
+		rc = -1;
+	}
+out:
+	cmd_free(h, c);
+	return rc;
 }
 
 static int hpsa_vpd_page_supported(struct ctlr_info *h,
@@ -2391,6 +3039,7 @@ static void hpsa_get_ioaccel_status(struct ctlr_info *h,
 
 	this_device->offload_config = 0;
 	this_device->offload_enabled = 0;
+	this_device->offload_to_be_enabled = 0;
 
 	buf = kzalloc(64, GFP_KERNEL);
 	if (!buf)
@@ -2414,6 +3063,7 @@ static void hpsa_get_ioaccel_status(struct ctlr_info *h,
 		if (hpsa_get_raid_map(h, scsi3addr, this_device))
 			this_device->offload_enabled = 0;
 	}
+	this_device->offload_to_be_enabled = this_device->offload_enabled;
 out:
 	kfree(buf);
 	return;
@@ -2439,7 +3089,7 @@ static int hpsa_get_device_id(struct ctlr_info *h, unsigned char *scsi3addr,
 }
 
 static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical,
-		struct ReportLUNdata *buf, int bufsize,
+		void *buf, int bufsize,
 		int extended_response)
 {
 	int rc = IO_OK;
@@ -2447,11 +3097,8 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical,
 	unsigned char scsi3addr[8];
 	struct ErrorInfo *ei;
 
-	c = cmd_special_alloc(h);
-	if (c == NULL) {			/* trouble... */
-		dev_err(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
-		return -1;
-	}
+	c = cmd_alloc(h);
+
 	/* address the controller */
 	memset(scsi3addr, 0, sizeof(scsi3addr));
 	if (fill_cmd(c, logical ? HPSA_REPORT_LOG : HPSA_REPORT_PHYS, h,
@@ -2461,31 +3108,35 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical,
 	}
 	if (extended_response)
 		c->Request.CDB[1] = extended_response;
-	hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+	rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+					PCI_DMA_FROMDEVICE, NO_TIMEOUT);
+	if (rc)
+		goto out;
 	ei = c->err_info;
 	if (ei->CommandStatus != 0 &&
 	    ei->CommandStatus != CMD_DATA_UNDERRUN) {
 		hpsa_scsi_interpret_error(h, c);
 		rc = -1;
 	} else {
-		if (buf->extended_response_flag != extended_response) {
+		struct ReportLUNdata *rld = buf;
+		if (rld->extended_response_flag != extended_response) {
 			dev_err(&h->pdev->dev,
 				"report luns requested format %u, got %u\n",
 				extended_response,
-				buf->extended_response_flag);
+				rld->extended_response_flag);
 			rc = -1;
 		}
 	}
 out:
-	cmd_special_free(h, c);
+	cmd_free(h, c);
 	return rc;
 }
 
 static inline int hpsa_scsi_do_report_phys_luns(struct ctlr_info *h,
-		struct ReportLUNdata *buf,
-		int bufsize, int extended_response)
+		struct ReportExtendedLUNdata *buf, int bufsize)
 {
-	return hpsa_scsi_do_report_luns(h, 0, buf, bufsize, extended_response);
+	return hpsa_scsi_do_report_luns(h, 0, buf, bufsize,
+						HPSA_REPORT_PHYS_EXTENDED);
 }
 
 static inline int hpsa_scsi_do_report_log_luns(struct ctlr_info *h,
@@ -2551,8 +3202,9 @@ static int hpsa_volume_offline(struct ctlr_info *h,
 					unsigned char scsi3addr[])
 {
 	struct CommandList *c;
-	unsigned char *sense, sense_key, asc, ascq;
-	int ldstat = 0;
+	unsigned char *sense;
+	int sense_key, asc, ascq, sense_len;
+	int rc, ldstat = 0;
 	u16 cmd_status;
 	u8 scsi_status;
 #define ASC_LUN_NOT_READY 0x04
@@ -2560,16 +3212,21 @@ static int hpsa_volume_offline(struct ctlr_info *h,
 #define ASCQ_LUN_NOT_READY_INITIALIZING_CMD_REQ 0x02
 
 	c = cmd_alloc(h);
-	if (!c)
-		return 0;
+
 	(void) fill_cmd(c, TEST_UNIT_READY, h, NULL, 0, 0, scsi3addr, TYPE_CMD);
-	hpsa_scsi_do_simple_cmd_core(h, c);
-	sense = c->err_info->SenseInfo;
-	sense_key = sense[2];
-	asc = sense[12];
-	ascq = sense[13];
-	cmd_status = c->err_info->CommandStatus;
-	scsi_status = c->err_info->ScsiStatus;
+	rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT);
+	if (rc) {
+		cmd_free(h, c);
+		return 0;
+	}
+	sense = c->err_info->SenseInfo;
+	if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo))
+		sense_len = sizeof(c->err_info->SenseInfo);
+	else
+		sense_len = c->err_info->SenseLen;
+	decode_sense_data(sense, sense_len, &sense_key, &asc, &ascq);
+	cmd_status = c->err_info->CommandStatus;
+	scsi_status = c->err_info->ScsiStatus;
 	cmd_free(h, c);
 	/* Is the volume 'not ready'? */
 	if (cmd_status != CMD_TARGET_STATUS ||
@@ -2607,6 +3264,51 @@ static int hpsa_volume_offline(struct ctlr_info *h,
 	return 0;
 }
 
+/* Find out if a logical device supports aborts by simply trying one.
+ * Smart Array may claim not to support aborts on logical drives, but
+ * if a MSA2000 * is connected, the drives on that will be presented
+ * by the Smart Array as logical drives, and aborts may be sent to
+ * those devices successfully.  So the simplest way to find out is
+ * to simply try an abort and see how the device responds.
+ */
+static int hpsa_device_supports_aborts(struct ctlr_info *h,
+					unsigned char *scsi3addr)
+{
+	struct CommandList *c;
+	struct ErrorInfo *ei;
+	int rc = 0;
+
+	u64 tag = (u64) -1; /* bogus tag */
+
+	/* Assume that physical devices support aborts */
+	if (!is_logical_dev_addr_mode(scsi3addr))
+		return 1;
+
+	c = cmd_alloc(h);
+
+	(void) fill_cmd(c, HPSA_ABORT_MSG, h, &tag, 0, 0, scsi3addr, TYPE_MSG);
+	(void) hpsa_scsi_do_simple_cmd(h, c, 0, NO_TIMEOUT);
+	/* no unmap needed here because no data xfer. */
+	ei = c->err_info;
+	switch (ei->CommandStatus) {
+	case CMD_INVALID:
+		rc = 0;
+		break;
+	case CMD_UNABORTABLE:
+	case CMD_ABORT_FAILED:
+		rc = 1;
+		break;
+	case CMD_TMF_STATUS:
+		rc = hpsa_evaluate_tmf_status(h, c);
+		break;
+	default:
+		rc = 0;
+		break;
+	}
+	cmd_free(h, c);
+	return rc;
+}
+
 static int hpsa_update_device_info(struct ctlr_info *h,
 	unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device,
 	unsigned char *is_OBDR_device)
@@ -2659,7 +3361,10 @@ static int hpsa_update_device_info(struct ctlr_info *h,
 		this_device->raid_level = RAID_UNKNOWN;
 		this_device->offload_config = 0;
 		this_device->offload_enabled = 0;
+		this_device->offload_to_be_enabled = 0;
+		this_device->hba_ioaccel_enabled = 0;
 		this_device->volume_offline = 0;
+		this_device->queue_depth = h->nr_cmds;
 	}
 
 	if (is_OBDR_device) {
@@ -2671,7 +3376,6 @@ static int hpsa_update_device_info(struct ctlr_info *h,
 					strncmp(obdr_sig, OBDR_TAPE_SIG,
 						OBDR_SIG_LEN) == 0);
 	}
-
 	kfree(inq_buff);
 	return 0;
 
@@ -2680,6 +3384,31 @@ static int hpsa_update_device_info(struct ctlr_info *h,
 	return 1;
 }
 
+static void hpsa_update_device_supports_aborts(struct ctlr_info *h,
+			struct hpsa_scsi_dev_t *dev, u8 *scsi3addr)
+{
+	unsigned long flags;
+	int rc, entry;
+	/*
+	 * See if this device supports aborts.  If we already know
+	 * the device, we already know if it supports aborts, otherwise
+	 * we have to find out if it supports aborts by trying one.
+	 */
+	spin_lock_irqsave(&h->devlock, flags);
+	rc = hpsa_scsi_find_entry(dev, h->dev, h->ndevices, &entry);
+	if ((rc == DEVICE_SAME || rc == DEVICE_UPDATED) &&
+		entry >= 0 && entry < h->ndevices) {
+		dev->supports_aborts = h->dev[entry]->supports_aborts;
+		spin_unlock_irqrestore(&h->devlock, flags);
+	} else {
+		spin_unlock_irqrestore(&h->devlock, flags);
+		dev->supports_aborts =
+				hpsa_device_supports_aborts(h, scsi3addr);
+		if (dev->supports_aborts < 0)
+			dev->supports_aborts = 0;
+	}
+}
+
 static unsigned char *ext_target_model[] = {
 	"MSA2012",
 	"MSA2024",
@@ -2785,6 +3514,7 @@ static int add_ext_target_dev(struct ctlr_info *h,
 	(*n_ext_target_devs)++;
 	hpsa_set_bus_target_lun(this_device,
 				tmpdevice->bus, tmpdevice->target, 0);
+	hpsa_update_device_supports_aborts(h, this_device, scsi3addr);
 	set_bit(tmpdevice->target, lunzerobits);
 	return 1;
 }
@@ -2800,90 +3530,23 @@ static int add_ext_target_dev(struct ctlr_info *h,
 static int hpsa_get_pdisk_of_ioaccel2(struct ctlr_info *h,
 	struct CommandList *ioaccel2_cmd_to_abort, unsigned char *scsi3addr)
 {
-	struct ReportExtendedLUNdata *physicals = NULL;
-	int responsesize = 24;	/* size of physical extended response */
-	int extended = 2;	/* flag forces reporting 'other dev info'. */
-	int reportsize = sizeof(*physicals) + HPSA_MAX_PHYS_LUN * responsesize;
-	u32 nphysicals = 0;	/* number of reported physical devs */
-	int found = 0;		/* found match (1) or not (0) */
-	u32 find;		/* handle we need to match */
+	struct io_accel2_cmd *c2 =
+			&h->ioaccel2_cmd_pool[ioaccel2_cmd_to_abort->cmdindex];
+	unsigned long flags;
 	int i;
-	struct scsi_cmnd *scmd;	/* scsi command within request being aborted */
-	struct hpsa_scsi_dev_t *d; /* device of request being aborted */
-	struct io_accel2_cmd *c2a; /* ioaccel2 command to abort */
-	u32 it_nexus;		/* 4 byte device handle for the ioaccel2 cmd */
-	u32 scsi_nexus;		/* 4 byte device handle for the ioaccel2 cmd */
-
-	if (ioaccel2_cmd_to_abort->cmd_type != CMD_IOACCEL2)
-		return 0; /* no match */
-
-	/* point to the ioaccel2 device handle */
-	c2a = &h->ioaccel2_cmd_pool[ioaccel2_cmd_to_abort->cmdindex];
-	if (c2a == NULL)
-		return 0; /* no match */
-
-	scmd = (struct scsi_cmnd *) ioaccel2_cmd_to_abort->scsi_cmd;
-	if (scmd == NULL)
-		return 0; /* no match */
-
-	d = scmd->device->hostdata;
-	if (d == NULL)
-		return 0; /* no match */
-
-	it_nexus = cpu_to_le32((u32) d->ioaccel_handle);
-	scsi_nexus = cpu_to_le32((u32) c2a->scsi_nexus);
-	find = c2a->scsi_nexus;
-
-	if (h->raid_offload_debug > 0)
-		dev_info(&h->pdev->dev,
-			"%s: scsi_nexus:0x%08x device id: 0x%02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x\n",
-			__func__, scsi_nexus,
-			d->device_id[0], d->device_id[1], d->device_id[2],
-			d->device_id[3], d->device_id[4], d->device_id[5],
-			d->device_id[6], d->device_id[7], d->device_id[8],
-			d->device_id[9], d->device_id[10], d->device_id[11],
-			d->device_id[12], d->device_id[13], d->device_id[14],
-			d->device_id[15]);
-
-	/* Get the list of physical devices */
-	physicals = kzalloc(reportsize, GFP_KERNEL);
-	if (physicals == NULL)
-		return 0;
-	if (hpsa_scsi_do_report_phys_luns(h, (struct ReportLUNdata *) physicals,
-		reportsize, extended)) {
-		dev_err(&h->pdev->dev,
-			"Can't lookup %s device handle: report physical LUNs failed.\n",
-			"HP SSD Smart Path");
-		kfree(physicals);
-		return 0;
-	}
-	nphysicals = be32_to_cpu(*((__be32 *)physicals->LUNListLength)) /
-							responsesize;
-
-	/* find ioaccel2 handle in list of physicals: */
-	for (i = 0; i < nphysicals; i++) {
-		struct ext_report_lun_entry *entry = &physicals->LUN[i];
-
-		/* handle is in bytes 28-31 of each lun */
-		if (entry->ioaccel_handle != find)
-			continue; /* didn't match */
-		found = 1;
-		memcpy(scsi3addr, entry->lunid, 8);
-		if (h->raid_offload_debug > 0)
-			dev_info(&h->pdev->dev,
-				"%s: Searched h=0x%08x, Found h=0x%08x, scsiaddr 0x%8phN\n",
-				__func__, find,
-				entry->ioaccel_handle, scsi3addr);
-		break; /* found it */
-	}
-
-	kfree(physicals);
-	if (found)
-		return 1;
-	else
-		return 0;
 
+	spin_lock_irqsave(&h->devlock, flags);
+	for (i = 0; i < h->ndevices; i++)
+		if (h->dev[i]->ioaccel_handle == c2->scsi_nexus) {
+			memcpy(scsi3addr, h->dev[i]->scsi3addr,
+				sizeof(h->dev[i]->scsi3addr));
+			spin_unlock_irqrestore(&h->devlock, flags);
+			return 1;
+		}
+	spin_unlock_irqrestore(&h->devlock, flags);
+	return 0;
 }
+
 /*
  * Do CISS_REPORT_PHYS and CISS_REPORT_LOG.  Data is returned in physdev,
  * logdev.  The number of luns in physdev and logdev are returned in
@@ -2891,34 +3554,21 @@ static int hpsa_get_pdisk_of_ioaccel2(struct ctlr_info *h,
  * Returns 0 on success, -1 otherwise.
  */
 static int hpsa_gather_lun_info(struct ctlr_info *h,
-	int reportlunsize,
-	struct ReportLUNdata *physdev, u32 *nphysicals, int *physical_mode,
+	struct ReportExtendedLUNdata *physdev, u32 *nphysicals,
 	struct ReportLUNdata *logdev, u32 *nlogicals)
 {
-	int physical_entry_size = 8;
-
-	*physical_mode = 0;
-
-	/* For I/O accelerator mode we need to read physical device handles */
-	if (h->transMethod & CFGTBL_Trans_io_accel1 ||
-		h->transMethod & CFGTBL_Trans_io_accel2) {
-		*physical_mode = HPSA_REPORT_PHYS_EXTENDED;
-		physical_entry_size = 24;
-	}
-	if (hpsa_scsi_do_report_phys_luns(h, physdev, reportlunsize,
-							*physical_mode)) {
+	if (hpsa_scsi_do_report_phys_luns(h, physdev, sizeof(*physdev))) {
 		dev_err(&h->pdev->dev, "report physical LUNs failed.\n");
 		return -1;
 	}
-	*nphysicals = be32_to_cpu(*((__be32 *)physdev->LUNListLength)) /
-							physical_entry_size;
+	*nphysicals = be32_to_cpu(*((__be32 *)physdev->LUNListLength)) / 24;
 	if (*nphysicals > HPSA_MAX_PHYS_LUN) {
 		dev_warn(&h->pdev->dev, "maximum physical LUNs (%d) exceeded."
 			"  %d LUNs ignored.\n", HPSA_MAX_PHYS_LUN,
 			*nphysicals - HPSA_MAX_PHYS_LUN);
 		*nphysicals = HPSA_MAX_PHYS_LUN;
 	}
-	if (hpsa_scsi_do_report_log_luns(h, logdev, reportlunsize)) {
+	if (hpsa_scsi_do_report_log_luns(h, logdev, sizeof(*logdev))) {
 		dev_err(&h->pdev->dev, "report logical LUNs failed.\n");
 		return -1;
 	}
@@ -2991,6 +3641,34 @@ static int hpsa_hba_mode_enabled(struct ctlr_info *h)
 	return hba_mode_enabled;
 }
 
+/* get physical drive ioaccel handle and queue depth */
+void hpsa_get_ioaccel_drive_info(struct ctlr_info *h,
+		struct hpsa_scsi_dev_t *dev,
+		u8 *lunaddrbytes,
+		struct bmic_identify_physical_device *id_phys)
+{
+	int rc;
+	struct ext_report_lun_entry *rle =
+		(struct ext_report_lun_entry *) lunaddrbytes;
+
+	dev->ioaccel_handle = rle->ioaccel_handle;
+	if (PHYS_IOACCEL(lunaddrbytes) && dev->ioaccel_handle)
+		dev->hba_ioaccel_enabled = 1;
+	memset(id_phys, 0, sizeof(*id_phys));
+	rc = hpsa_bmic_id_physical_device(h, lunaddrbytes,
+			GET_BMIC_DRIVE_NUMBER(lunaddrbytes), id_phys,
+			sizeof(*id_phys));
+	if (!rc)
+		/* Reserve space for FW operations */
+#define DRIVE_CMDS_RESERVED_FOR_FW 2
+		dev->queue_depth =
+			le16_to_cpu(id_phys->current_queue_depth_limit) -
+				DRIVE_CMDS_RESERVED_FOR_FW;
+	else
+		dev->queue_depth = 7; /* conservative */
+	atomic_set(&dev->ioaccel_cmds_out, 0);
+}
+
 static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 {
 	/* the idea here is we could get notified
@@ -3005,24 +3683,25 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 	 */
 	struct ReportExtendedLUNdata *physdev_list = NULL;
 	struct ReportLUNdata *logdev_list = NULL;
+	struct bmic_identify_physical_device *id_phys = NULL;
 	u32 nphysicals = 0;
 	u32 nlogicals = 0;
-	int physical_mode = 0;
 	u32 ndev_allocated = 0;
 	struct hpsa_scsi_dev_t **currentsd, *this_device, *tmpdevice;
 	int ncurrent = 0;
-	int reportlunsize = sizeof(*physdev_list) + HPSA_MAX_PHYS_LUN * 24;
 	int i, n_ext_target_devs, ndevs_to_allocate;
 	int raid_ctlr_position;
 	int rescan_hba_mode;
 	DECLARE_BITMAP(lunzerobits, MAX_EXT_TARGETS);
 
 	currentsd = kzalloc(sizeof(*currentsd) * HPSA_MAX_DEVICES, GFP_KERNEL);
-	physdev_list = kzalloc(reportlunsize, GFP_KERNEL);
-	logdev_list = kzalloc(reportlunsize, GFP_KERNEL);
+	physdev_list = kzalloc(sizeof(*physdev_list), GFP_KERNEL);
+	logdev_list = kzalloc(sizeof(*logdev_list), GFP_KERNEL);
 	tmpdevice = kzalloc(sizeof(*tmpdevice), GFP_KERNEL);
+	id_phys = kzalloc(sizeof(*id_phys), GFP_KERNEL);
 
-	if (!currentsd || !physdev_list || !logdev_list || !tmpdevice) {
+	if (!currentsd || !physdev_list || !logdev_list ||
+		!tmpdevice || !id_phys) {
 		dev_err(&h->pdev->dev, "out of memory\n");
 		goto out;
 	}
@@ -3039,9 +3718,8 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 
 	h->hba_mode_enabled = rescan_hba_mode;
 
-	if (hpsa_gather_lun_info(h, reportlunsize,
-			(struct ReportLUNdata *) physdev_list, &nphysicals,
-			&physical_mode, logdev_list, &nlogicals))
+	if (hpsa_gather_lun_info(h, physdev_list, &nphysicals,
+			logdev_list, &nlogicals))
 		goto out;
 
 	/* We might see up to the maximum number of logical and physical disks
@@ -3081,16 +3759,19 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 		/* Figure out where the LUN ID info is coming from */
 		lunaddrbytes = figure_lunaddrbytes(h, raid_ctlr_position,
 			i, nphysicals, nlogicals, physdev_list, logdev_list);
-		/* skip masked physical devices. */
-		if (lunaddrbytes[3] & 0xC0 &&
-			i < nphysicals + (raid_ctlr_position == 0))
-			continue;
+
+		/* skip masked non-disk devices */
+		if (MASKED_DEVICE(lunaddrbytes))
+			if (i < nphysicals + (raid_ctlr_position == 0) &&
+				NON_DISK_PHYS_DEV(lunaddrbytes))
+				continue;
 
 		/* Get device type, vendor, model, device id */
 		if (hpsa_update_device_info(h, lunaddrbytes, tmpdevice,
 							&is_OBDR))
 			continue; /* skip it if we can't talk to it. */
 		figure_bus_target_lun(h, lunaddrbytes, tmpdevice);
+		hpsa_update_device_supports_aborts(h, tmpdevice, lunaddrbytes);
 		this_device = currentsd[ncurrent];
 
 		/*
@@ -3109,6 +3790,18 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 
 		*this_device = *tmpdevice;
 
+		/* do not expose masked devices */
+		if (MASKED_DEVICE(lunaddrbytes) &&
+			i < nphysicals + (raid_ctlr_position == 0)) {
+			if (h->hba_mode_enabled)
+				dev_warn(&h->pdev->dev,
+					"Masked physical device detected\n");
+			this_device->expose_state = HPSA_DO_NOT_EXPOSE;
+		} else {
+			this_device->expose_state =
+					HPSA_SG_ATTACH | HPSA_ULD_ATTACH;
+		}
+
 		switch (this_device->devtype) {
 		case TYPE_ROM:
 			/* We don't *really* support actual CD-ROM devices,
@@ -3122,33 +3815,30 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 				ncurrent++;
 			break;
 		case TYPE_DISK:
-			if (h->hba_mode_enabled) {
-				/* never use raid mapper in HBA mode */
-				this_device->offload_enabled = 0;
-				ncurrent++;
-				break;
-			} else if (h->acciopath_status) {
-				if (i >= nphysicals) {
-					ncurrent++;
-					break;
-				}
-			} else {
-				if (i < nphysicals)
-					break;
+			if (i >= nphysicals) {
 				ncurrent++;
 				break;
 			}
-			if (physical_mode == HPSA_REPORT_PHYS_EXTENDED) {
-				memcpy(&this_device->ioaccel_handle,
-					&lunaddrbytes[20],
-					sizeof(this_device->ioaccel_handle));
-				ncurrent++;
-			}
+
+			if (h->hba_mode_enabled)
+				/* never use raid mapper in HBA mode */
+				this_device->offload_enabled = 0;
+			else if (!(h->transMethod & CFGTBL_Trans_io_accel1 ||
+				h->transMethod & CFGTBL_Trans_io_accel2))
+				break;
+
+			hpsa_get_ioaccel_drive_info(h, this_device,
+						lunaddrbytes, id_phys);
+			ncurrent++;
 			break;
 		case TYPE_TAPE:
 		case TYPE_MEDIUM_CHANGER:
 			ncurrent++;
 			break;
+		case TYPE_ENCLOSURE:
+			if (h->hba_mode_enabled)
+				ncurrent++;
+			break;
 		case TYPE_RAID:
 			/* Only present the Smartarray HBA as a RAID controller.
 			 * If it's a RAID controller other than the HBA itself
@@ -3173,6 +3863,18 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
 	kfree(currentsd);
 	kfree(physdev_list);
 	kfree(logdev_list);
+	kfree(id_phys);
+}
+
+static void hpsa_set_sg_descriptor(struct SGDescriptor *desc,
+				   struct scatterlist *sg)
+{
+	u64 addr64 = (u64) sg_dma_address(sg);
+	unsigned int len = sg_dma_len(sg);
+
+	desc->Addr = cpu_to_le64(addr64);
+	desc->Len = cpu_to_le32(len);
+	desc->Ext = 0;
 }
 
 /* hpsa_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci
@@ -3183,10 +3885,8 @@ static int hpsa_scatter_gather(struct ctlr_info *h,
 		struct CommandList *cp,
 		struct scsi_cmnd *cmd)
 {
-	unsigned int len;
 	struct scatterlist *sg;
-	u64 addr64;
-	int use_sg, i, sg_index, chained;
+	int use_sg, i, sg_limit, chained, last_sg;
 	struct SGDescriptor *curr_sg;
 
 	BUG_ON(scsi_sg_count(cmd) > h->maxsgentries);
@@ -3198,25 +3898,36 @@ static int hpsa_scatter_gather(struct ctlr_info *h,
 	if (!use_sg)
 		goto sglist_finished;
 
+	/* If the number of entries is greater than the max for a single list,
+	 * then we have a chained list; we will set up all but one entry in the
+	 * first list (the last entry is saved for link information);
+	 * otherwise, we don't have a chained list and we'll set up at each of
+	 * the entries in the one list. */
 	curr_sg = cp->SG;
-	chained = 0;
-	sg_index = 0;
-	scsi_for_each_sg(cmd, sg, use_sg, i) {
-		if (i == h->max_cmd_sg_entries - 1 &&
-			use_sg > h->max_cmd_sg_entries) {
-			chained = 1;
-			curr_sg = h->cmd_sg_list[cp->cmdindex];
-			sg_index = 0;
-		}
-		addr64 = (u64) sg_dma_address(sg);
-		len  = sg_dma_len(sg);
-		curr_sg->Addr.lower = (u32) (addr64 & 0x0FFFFFFFFULL);
-		curr_sg->Addr.upper = (u32) ((addr64 >> 32) & 0x0FFFFFFFFULL);
-		curr_sg->Len = len;
-		curr_sg->Ext = (i < scsi_sg_count(cmd) - 1) ? 0 : HPSA_SG_LAST;
+	chained = use_sg > h->max_cmd_sg_entries;
+	sg_limit = chained ? h->max_cmd_sg_entries - 1 : use_sg;
+	last_sg = scsi_sg_count(cmd) - 1;
+	scsi_for_each_sg(cmd, sg, sg_limit, i) {
+		hpsa_set_sg_descriptor(curr_sg, sg);
 		curr_sg++;
 	}
 
+	if (chained) {
+		/* Continue with the chained list.  Set curr_sg to the chained
+		 * list.  Modify the limit to the total count less the entries
+		 * we've already set up.  Resume the scan at the list entry
+		 * where the previous loop left off. */
+		curr_sg = h->cmd_sg_list[cp->cmdindex];
+		sg_limit = use_sg - sg_limit;
+		for_each_sg(sg, sg, sg_limit, i) {
+			hpsa_set_sg_descriptor(curr_sg, sg);
+			curr_sg++;
+		}
+	}
+
+	/* Back the pointer up to the last entry and mark it as "last". */
+	(curr_sg - 1)->Ext = cpu_to_le32(HPSA_SG_LAST);
+
 	if (use_sg + chained > h->maxSG)
 		h->maxSG = use_sg + chained;
 
@@ -3287,7 +3998,7 @@ static int fixup_ioaccel_cdb(u8 *cdb, int *cdb_len)
 
 static int hpsa_scsi_ioaccel1_queue_command(struct ctlr_info *h,
 	struct CommandList *c, u32 ioaccel_handle, u8 *cdb, int cdb_len,
-	u8 *scsi3addr)
+	u8 *scsi3addr, struct hpsa_scsi_dev_t *phys_disk)
 {
 	struct scsi_cmnd *cmd = c->scsi_cmd;
 	struct io_accel1_cmd *cp = &h->ioaccel_cmd_pool[c->cmdindex];
@@ -3295,18 +4006,22 @@ static int hpsa_scsi_ioaccel1_queue_command(struct ctlr_info *h,
 	unsigned int total_len = 0;
 	struct scatterlist *sg;
 	u64 addr64;
-	int use_sg, i;
+	int use_sg, i, last_sg;
 	struct SGDescriptor *curr_sg;
 	u32 control = IOACCEL1_CONTROL_SIMPLEQUEUE;
 
 	/* TODO: implement chaining support */
-	if (scsi_sg_count(cmd) > h->ioaccel_maxsg)
+	if (scsi_sg_count(cmd) > h->ioaccel_maxsg) {
+		atomic_dec(&phys_disk->ioaccel_cmds_out);
 		return IO_ACCEL_INELIGIBLE;
+	}
 
 	BUG_ON(cmd->cmd_len > IOACCEL1_IOFLAGS_CDBLEN_MAX);
 
-	if (fixup_ioaccel_cdb(cdb, &cdb_len))
+	if (fixup_ioaccel_cdb(cdb, &cdb_len)) {
+		atomic_dec(&phys_disk->ioaccel_cmds_out);
 		return IO_ACCEL_INELIGIBLE;
+	}
 
 	c->cmd_type = CMD_IOACCEL1;
 
@@ -3315,25 +4030,30 @@ static int hpsa_scsi_ioaccel1_queue_command(struct ctlr_info *h,
 				(c->cmdindex * sizeof(*cp));
 	BUG_ON(c->busaddr & 0x0000007F);
 
+	/* Try to honor the device's queue depth */
+	if (atomic_inc_return(&phys_disk->ioaccel_cmds_out) >
+					phys_disk->queue_depth) {
+		atomic_dec(&phys_disk->ioaccel_cmds_out);
+		return IO_ACCEL_INELIGIBLE;
+	}
+
 	use_sg = scsi_dma_map(cmd);
-	if (use_sg < 0)
+	if (use_sg < 0) {
+		atomic_dec(&phys_disk->ioaccel_cmds_out);
 		return use_sg;
+	}
 
 	if (use_sg) {
+		last_sg = scsi_sg_count(cmd) - 1;
 		curr_sg = cp->SG;
 		scsi_for_each_sg(cmd, sg, use_sg, i) {
 			addr64 = (u64) sg_dma_address(sg);
 			len  = sg_dma_len(sg);
 			total_len += len;
-			curr_sg->Addr.lower = (u32) (addr64 & 0x0FFFFFFFFULL);
-			curr_sg->Addr.upper =
-				(u32) ((addr64 >> 32) & 0x0FFFFFFFFULL);
-			curr_sg->Len = len;
-
-			if (i == (scsi_sg_count(cmd) - 1))
-				curr_sg->Ext = HPSA_SG_LAST;
-			else
-				curr_sg->Ext = 0;  /* we are not chaining */
+			curr_sg->Addr = cpu_to_le64(addr64);
+			curr_sg->Len = cpu_to_le32(len);
+			curr_sg->Ext =
+				cpu_to_le32((i == last_sg) * HPSA_SG_LAST);
 			curr_sg++;
 		}
 
@@ -3381,28 +4101,24 @@ static int hpsa_scsi_ioaccel_direct_map(struct ctlr_info *h,
 	struct scsi_cmnd *cmd = c->scsi_cmd;
 	struct hpsa_scsi_dev_t *dev = cmd->device->hostdata;
 
+	c->phys_disk = dev;
+
 	return hpsa_scsi_ioaccel_queue_command(h, c, dev->ioaccel_handle,
-		cmd->cmnd, cmd->cmd_len, dev->scsi3addr);
+		cmd->cmnd, cmd->cmd_len, dev->scsi3addr, dev);
 }
 
 /*
  * Set encryption parameters for the ioaccel2 request
  */
 static void set_encrypt_ioaccel2(struct ctlr_info *h,
-	struct CommandList *c, struct io_accel2_cmd *cp)
+	struct CommandList *c, struct io_accel2_cmd *cp, u16 dekindex)
 {
 	struct scsi_cmnd *cmd = c->scsi_cmd;
 	struct hpsa_scsi_dev_t *dev = cmd->device->hostdata;
 	struct raid_map_data *map = &dev->raid_map;
 	u64 first_block;
 
-	BUG_ON(!(dev->offload_config && dev->offload_enabled));
-
-	/* Are we doing encryption on this device */
-	if (!(map->flags & RAID_MAP_FLAG_ENCRYPT_ON))
-		return;
-	/* Set the data encryption key index. */
-	cp->dekindex = map->dekindex;
+	cp->dekindex = dekindex;
 
 	/* Set the encryption enable flag, encoded into direction field. */
 	cp->direction |= IOACCEL2_DIRECTION_ENCRYPT_MASK;
@@ -3509,7 +4225,7 @@ static void set_encrypt_ioaccel2(struct ctlr_info *h,
 
 static int hpsa_scsi_ioaccel2_queue_command(struct ctlr_info *h,
 	struct CommandList *c, u32 ioaccel_handle, u8 *cdb, int cdb_len,
-	u8 *scsi3addr)
+	u8 *scsi3addr, struct hpsa_scsi_dev_t *phys_disk)
 {
 	struct scsi_cmnd *cmd = c->scsi_cmd;
 	struct io_accel2_cmd *cp = &h->ioaccel2_cmd_pool[c->cmdindex];
@@ -3520,11 +4236,13 @@ static int hpsa_scsi_ioaccel2_queue_command(struct ctlr_info *h,
 	u32 len;
 	u32 total_len = 0;
 
-	if (scsi_sg_count(cmd) > h->ioaccel_maxsg)
-		return IO_ACCEL_INELIGIBLE;
+	BUG_ON(scsi_sg_count(cmd) > h->maxsgentries);
 
-	if (fixup_ioaccel_cdb(cdb, &cdb_len))
+	if (fixup_ioaccel_cdb(cdb, &cdb_len)) {
+		atomic_dec(&phys_disk->ioaccel_cmds_out);
 		return IO_ACCEL_INELIGIBLE;
+	}
+
 	c->cmd_type = CMD_IOACCEL2;
 	/* Adjust the DMA address to point to the accelerated command buffer */
 	c->busaddr = (u32) h->ioaccel2_cmd_pool_dhandle +
@@ -3534,13 +4252,32 @@ static int hpsa_scsi_ioaccel2_queue_command(struct ctlr_info *h,
 	memset(cp, 0, sizeof(*cp));
 	cp->IU_type = IOACCEL2_IU_TYPE;
 
+	/* Try to honor the device's queue depth */
+	if (atomic_inc_return(&phys_disk->ioaccel_cmds_out) >
+						phys_disk->queue_depth) {
+		atomic_dec(&phys_disk->ioaccel_cmds_out);
+		return IO_ACCEL_INELIGIBLE;
+	}
+
 	use_sg = scsi_dma_map(cmd);
-	if (use_sg < 0)
+	if (use_sg < 0) {
+		atomic_dec(&phys_disk->ioaccel_cmds_out);
 		return use_sg;
+	}
 
 	if (use_sg) {
-		BUG_ON(use_sg > IOACCEL2_MAXSGENTRIES);
 		curr_sg = cp->sg;
+		if (use_sg > h->ioaccel_maxsg) {
+			addr64 = h->ioaccel2_cmd_sg_list[c->cmdindex]->address;
+			curr_sg->address = cpu_to_le64(addr64);
+			curr_sg->length = 0;
+			curr_sg->reserved[0] = 0;
+			curr_sg->reserved[1] = 0;
+			curr_sg->reserved[2] = 0;
+			curr_sg->chain_indicator = 0x80;
+
+			curr_sg = h->ioaccel2_cmd_sg_list[c->cmdindex];
+		}
 		scsi_for_each_sg(cmd, sg, use_sg, i) {
 			addr64 = (u64) sg_dma_address(sg);
 			len  = sg_dma_len(sg);
@@ -3579,21 +4316,30 @@ static int hpsa_scsi_ioaccel2_queue_command(struct ctlr_info *h,
 	}
 
 	/* Set encryption parameters, if necessary */
-	set_encrypt_ioaccel2(h, c, cp);
+	if (phys_disk->raid_map.flags & RAID_MAP_FLAG_ENCRYPT_ON)
+		set_encrypt_ioaccel2(h, c, cp, phys_disk->raid_map.dekindex);
 
 	cp->scsi_nexus = ioaccel_handle;
-	cp->Tag = (c->cmdindex << DIRECT_LOOKUP_SHIFT) |
-				DIRECT_LOOKUP_BIT;
+	cp->Tag = c->cmdindex << DIRECT_LOOKUP_SHIFT;
 	memcpy(cp->cdb, cdb, sizeof(cp->cdb));
 
-	/* fill in sg elements */
-	cp->sg_count = (u8) use_sg;
-
 	cp->data_len = cpu_to_le32(total_len);
 	cp->err_ptr = cpu_to_le64(c->busaddr +
 			offsetof(struct io_accel2_cmd, error_data));
 	cp->err_len = cpu_to_le32((u32) sizeof(cp->error_data));
 
+	/* fill in sg elements */
+	if (use_sg > h->ioaccel_maxsg) {
+		cp->sg_count = 1;
+		if (hpsa_map_ioaccel2_sg_chain_block(h, cp, c)) {
+			atomic_dec(&phys_disk->ioaccel_cmds_out);
+			scsi_dma_unmap(cmd);
+			return -1;
+		}
+	} else {
+		cp->sg_count = (u8) use_sg;
+	}
+
 	enqueue_cmd_and_start_io(h, c);
 	return 0;
 }
@@ -3603,14 +4349,22 @@ static int hpsa_scsi_ioaccel2_queue_command(struct ctlr_info *h,
  */
 static int hpsa_scsi_ioaccel_queue_command(struct ctlr_info *h,
 	struct CommandList *c, u32 ioaccel_handle, u8 *cdb, int cdb_len,
-	u8 *scsi3addr)
+	u8 *scsi3addr, struct hpsa_scsi_dev_t *phys_disk)
 {
+	/* Try to honor the device's queue depth */
+	if (atomic_inc_return(&phys_disk->ioaccel_cmds_out) >
+					phys_disk->queue_depth) {
+		atomic_dec(&phys_disk->ioaccel_cmds_out);
+		return IO_ACCEL_INELIGIBLE;
+	}
 	if (h->transMethod & CFGTBL_Trans_io_accel1)
 		return hpsa_scsi_ioaccel1_queue_command(h, c, ioaccel_handle,
-						cdb, cdb_len, scsi3addr);
+						cdb, cdb_len, scsi3addr,
+						phys_disk);
 	else
 		return hpsa_scsi_ioaccel2_queue_command(h, c, ioaccel_handle,
-						cdb, cdb_len, scsi3addr);
+						cdb, cdb_len, scsi3addr,
+						phys_disk);
 }
 
 static void raid_map_helper(struct raid_map_data *map,
@@ -3675,8 +4429,6 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
 #endif
 	int offload_to_mirror;
 
-	BUG_ON(!(dev->offload_config && dev->offload_enabled));
-
 	/* check for valid opcode, get LBA and block count */
 	switch (cmd->cmnd[0]) {
 	case WRITE_6:
@@ -3809,11 +4561,6 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
 		offload_to_mirror =
 			(offload_to_mirror >= map->layout_map_count - 1)
 			? 0 : offload_to_mirror + 1;
-		/* FIXME: remove after debug/dev */
-		BUG_ON(offload_to_mirror >= map->layout_map_count);
-		dev_warn(&h->pdev->dev,
-			"DEBUG: Using physical disk map index %d from mirror group %d\n",
-			map_index, offload_to_mirror);
 		dev->offload_to_mirror = offload_to_mirror;
 		/* Avoid direct use of dev->offload_to_mirror within this
 		 * function since multiple threads might simultaneously
@@ -3911,6 +4658,11 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
 		return IO_ACCEL_INELIGIBLE;
 	}
 
+	if (unlikely(map_index >= RAID_MAP_MAX_ENTRIES))
+		return IO_ACCEL_INELIGIBLE;
+
+	c->phys_disk = dev->phys_disk[map_index];
+
 	disk_handle = dd[map_index].ioaccel_handle;
 	disk_block = map->disk_starting_blk + (first_row * map->strip_size) +
 			(first_row_offset - (first_column * map->strip_size));
@@ -3956,97 +4708,43 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
 		cdb_len = 10;
 	}
 	return hpsa_scsi_ioaccel_queue_command(h, c, disk_handle, cdb, cdb_len,
-						dev->scsi3addr);
+						dev->scsi3addr,
+						dev->phys_disk[map_index]);
 }
 
-static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd,
-	void (*done)(struct scsi_cmnd *))
+/* Submit commands down the "normal" RAID stack path */
+/* All callers to hpsa_ciss_submit must check lockup_detected
+ * beforehand, before (opt.) and after calling cmd_alloc
+ */
+static int hpsa_ciss_submit(struct ctlr_info *h,
+	struct CommandList *c, struct scsi_cmnd *cmd,
+	unsigned char scsi3addr[])
 {
-	struct ctlr_info *h;
-	struct hpsa_scsi_dev_t *dev;
-	unsigned char scsi3addr[8];
-	struct CommandList *c;
-	int rc = 0;
-
-	/* Get the ptr to our adapter structure out of cmd->host. */
-	h = sdev_to_hba(cmd->device);
-	dev = cmd->device->hostdata;
-	if (!dev) {
-		cmd->result = DID_NO_CONNECT << 16;
-		done(cmd);
-		return 0;
-	}
-	memcpy(scsi3addr, dev->scsi3addr, sizeof(scsi3addr));
-
-	if (unlikely(lockup_detected(h))) {
-		cmd->result = DID_ERROR << 16;
-		done(cmd);
-		return 0;
-	}
-	c = cmd_alloc(h);
-	if (c == NULL) {			/* trouble... */
-		dev_err(&h->pdev->dev, "cmd_alloc returned NULL!\n");
-		return SCSI_MLQUEUE_HOST_BUSY;
-	}
-
-	/* Fill in the command list header */
-
-	cmd->scsi_done = done;    /* save this for use by completion code */
-
-	/* save c in case we have to abort it  */
 	cmd->host_scribble = (unsigned char *) c;
-
 	c->cmd_type = CMD_SCSI;
 	c->scsi_cmd = cmd;
-
-	/* Call alternate submit routine for I/O accelerated commands.
-	 * Retries always go down the normal I/O path.
-	 */
-	if (likely(cmd->retries == 0 &&
-		cmd->request->cmd_type == REQ_TYPE_FS &&
-		h->acciopath_status)) {
-		if (dev->offload_enabled) {
-			rc = hpsa_scsi_ioaccel_raid_map(h, c);
-			if (rc == 0)
-				return 0; /* Sent on ioaccel path */
-			if (rc < 0) {   /* scsi_dma_map failed. */
-				cmd_free(h, c);
-				return SCSI_MLQUEUE_HOST_BUSY;
-			}
-		} else if (dev->ioaccel_handle) {
-			rc = hpsa_scsi_ioaccel_direct_map(h, c);
-			if (rc == 0)
-				return 0; /* Sent on direct map path */
-			if (rc < 0) {   /* scsi_dma_map failed. */
-				cmd_free(h, c);
-				return SCSI_MLQUEUE_HOST_BUSY;
-			}
-		}
-	}
-
 	c->Header.ReplyQueue = 0;  /* unused in simple mode */
 	memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8);
-	c->Header.Tag.lower = (c->cmdindex << DIRECT_LOOKUP_SHIFT);
-	c->Header.Tag.lower |= DIRECT_LOOKUP_BIT;
+	c->Header.tag = cpu_to_le64((u64) c->cmdindex << DIRECT_LOOKUP_SHIFT);
 
 	/* Fill in the request block... */
 
 	c->Request.Timeout = 0;
-	memset(c->Request.CDB, 0, sizeof(c->Request.CDB));
 	BUG_ON(cmd->cmd_len > sizeof(c->Request.CDB));
 	c->Request.CDBLen = cmd->cmd_len;
 	memcpy(c->Request.CDB, cmd->cmnd, cmd->cmd_len);
-	c->Request.Type.Type = TYPE_CMD;
-	c->Request.Type.Attribute = ATTR_SIMPLE;
 	switch (cmd->sc_data_direction) {
 	case DMA_TO_DEVICE:
-		c->Request.Type.Direction = XFER_WRITE;
+		c->Request.type_attr_dir =
+			TYPE_ATTR_DIR(TYPE_CMD, ATTR_SIMPLE, XFER_WRITE);
 		break;
 	case DMA_FROM_DEVICE:
-		c->Request.Type.Direction = XFER_READ;
+		c->Request.type_attr_dir =
+			TYPE_ATTR_DIR(TYPE_CMD, ATTR_SIMPLE, XFER_READ);
 		break;
 	case DMA_NONE:
-		c->Request.Type.Direction = XFER_NONE;
+		c->Request.type_attr_dir =
+			TYPE_ATTR_DIR(TYPE_CMD, ATTR_SIMPLE, XFER_NONE);
 		break;
 	case DMA_BIDIRECTIONAL:
 		/* This can happen if a buggy application does a scsi passthru
@@ -4054,7 +4752,8 @@ static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd,
 		 * ../scsi/scsi_ioctl.c:scsi_ioctl_send_command() )
 		 */
 
-		c->Request.Type.Direction = XFER_RSVD;
+		c->Request.type_attr_dir =
+			TYPE_ATTR_DIR(TYPE_CMD, ATTR_SIMPLE, XFER_RSVD);
 		/* This is technically wrong, and hpsa controllers should
 		 * reject it with CMD_INVALID, which is the most correct
 		 * response, but non-fibre backends appear to let it
@@ -4073,7 +4772,7 @@ static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd,
 	}
 
 	if (hpsa_scatter_gather(h, c, cmd) < 0) { /* Fill SG list */
-		cmd_free(h, c);
+		cmd_tagged_free(h, c);
 		return SCSI_MLQUEUE_HOST_BUSY;
 	}
 	enqueue_cmd_and_start_io(h, c);
@@ -4081,50 +4780,293 @@ static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd,
 	return 0;
 }
 
-static DEF_SCSI_QCMD(hpsa_scsi_queue_command)
+static void hpsa_cmd_init(struct ctlr_info *h, int index,
+				struct CommandList *c)
+{
+	dma_addr_t cmd_dma_handle, err_dma_handle;
+
+	/* Zero out all of commandlist except the last field, refcount */
+	memset(c, 0, offsetof(struct CommandList, refcount));
+	c->Header.tag = cpu_to_le64((u64) (index << DIRECT_LOOKUP_SHIFT));
+	cmd_dma_handle = h->cmd_pool_dhandle + index * sizeof(*c);
+	c->err_info = h->errinfo_pool + index;
+	memset(c->err_info, 0, sizeof(*c->err_info));
+	err_dma_handle = h->errinfo_pool_dhandle
+	    + index * sizeof(*c->err_info);
+	c->cmdindex = index;
+	c->busaddr = (u32) cmd_dma_handle;
+	c->ErrDesc.Addr = cpu_to_le64((u64) err_dma_handle);
+	c->ErrDesc.Len = cpu_to_le32((u32) sizeof(*c->err_info));
+	c->h = h;
+}
 
-static int do_not_scan_if_controller_locked_up(struct ctlr_info *h)
+static void hpsa_preinitialize_commands(struct ctlr_info *h)
 {
-	unsigned long flags;
+	int i;
 
-	/*
-	 * Don't let rescans be initiated on a controller known
-	 * to be locked up.  If the controller locks up *during*
-	 * a rescan, that thread is probably hosed, but at least
-	 * we can prevent new rescan threads from piling up on a
-	 * locked up controller.
-	 */
-	if (unlikely(lockup_detected(h))) {
-		spin_lock_irqsave(&h->scan_lock, flags);
-		h->scan_finished = 1;
-		wake_up_all(&h->scan_wait_queue);
-		spin_unlock_irqrestore(&h->scan_lock, flags);
-		return 1;
+	for (i = 0; i < h->nr_cmds; i++) {
+		struct CommandList *c = h->cmd_pool + i;
+		hpsa_cmd_init(h, i, c);
+		atomic_set(&c->refcount, 0);
 	}
-	return 0;
 }
 
-static void hpsa_scan_start(struct Scsi_Host *sh)
+static inline void hpsa_cmd_partial_init(struct ctlr_info *h, int index,
+				struct CommandList *c)
 {
-	struct ctlr_info *h = shost_to_hba(sh);
-	unsigned long flags;
+	dma_addr_t cmd_dma_handle = h->cmd_pool_dhandle + index * sizeof(*c);
 
-	if (do_not_scan_if_controller_locked_up(h))
-		return;
+	memset(c->Request.CDB, 0, sizeof(c->Request.CDB));
+	memset(c->err_info, 0, sizeof(*c->err_info));
+	c->busaddr = (u32) cmd_dma_handle;
+}
 
-	/* wait until any scan already in progress is finished. */
-	while (1) {
-		spin_lock_irqsave(&h->scan_lock, flags);
-		if (h->scan_finished)
-			break;
-		spin_unlock_irqrestore(&h->scan_lock, flags);
-		wait_event(h->scan_wait_queue, h->scan_finished);
-		/* Note: We don't need to worry about a race between this
-		 * thread and driver unload because the midlayer will
-		 * have incremented the reference count, so unload won't
-		 * happen if we're in here.
-		 */
-	}
+static int hpsa_ioaccel_submit(struct ctlr_info *h,
+		struct CommandList *c, struct scsi_cmnd *cmd,
+		unsigned char *scsi3addr)
+{
+	struct hpsa_scsi_dev_t *dev = cmd->device->hostdata;
+	int rc = IO_ACCEL_INELIGIBLE;
+
+	cmd->host_scribble = (unsigned char *) c;
+
+	if (dev->offload_enabled) {
+		hpsa_cmd_init(h, c->cmdindex, c);
+		c->cmd_type = CMD_SCSI;
+		c->scsi_cmd = cmd;
+		rc = hpsa_scsi_ioaccel_raid_map(h, c);
+		if (rc == 0)
+			return 0; /* Sent on ioaccel path */
+		if (rc < 0) {   /* scsi_dma_map failed. */
+			cmd_tagged_free(h, c);
+			return SCSI_MLQUEUE_HOST_BUSY;
+		}
+	} else if (dev->hba_ioaccel_enabled) {
+		hpsa_cmd_init(h, c->cmdindex, c);
+		c->cmd_type = CMD_SCSI;
+		c->scsi_cmd = cmd;
+		rc = hpsa_scsi_ioaccel_direct_map(h, c);
+		if (rc == 0)
+			return 0; /* Sent on direct map path */
+		if (rc < 0) {   /* scsi_dma_map failed. */
+			cmd_tagged_free(h, c);
+			return SCSI_MLQUEUE_HOST_BUSY;
+		}
+	}
+	return rc;
+}
+
+static void hpsa_command_resubmit_worker(struct work_struct *work)
+{
+	struct scsi_cmnd *cmd;
+	struct hpsa_scsi_dev_t *dev;
+	struct CommandList *c = container_of(work, struct CommandList, work);
+
+	cmd = c->scsi_cmd;
+	dev = cmd->device->hostdata;
+	if (!dev) {
+		cmd->result = DID_NO_CONNECT << 16;
+		return hpsa_cmd_free_and_done(c->h, c, cmd);
+	}
+	if (c->abort_pending)
+		return hpsa_cmd_abort_and_free(c->h, c, cmd);
+	if (c->cmd_type == CMD_IOACCEL2) {
+		struct ctlr_info *h = c->h;
+		struct io_accel2_cmd *c2 = &h->ioaccel2_cmd_pool[c->cmdindex];
+		int rc;
+
+		if (c2->error_data.serv_response ==
+				IOACCEL2_STATUS_SR_TASK_COMP_SET_FULL) {
+			rc = hpsa_ioaccel_submit(h, c, cmd, dev->scsi3addr);
+			if (rc == 0)
+				return;
+			if (rc == SCSI_MLQUEUE_HOST_BUSY) {
+				/*
+				 * If we get here, it means dma mapping failed.
+				 * Try again via scsi mid layer, which will
+				 * then get SCSI_MLQUEUE_HOST_BUSY.
+				 *
+				 * hpsa_ioaccel_submit will have already freed c
+				 * if it encountered a dma mapping failure.
+				 */
+				cmd->result = DID_IMM_RETRY << 16;
+				cmd->scsi_done(cmd);
+				return;
+			}
+			/* else, fall thru and resubmit down CISS path */
+		}
+	}
+	hpsa_cmd_partial_init(c->h, c->cmdindex, c);
+	if (hpsa_ciss_submit(c->h, c, cmd, dev->scsi3addr)) {
+		/*
+		 * If we get here, it means dma mapping failed. Try
+		 * again via scsi mid layer, which will then get
+		 * SCSI_MLQUEUE_HOST_BUSY.
+		 *
+		 * hpsa_ciss_submit will have already freed c
+		 * if it encountered a dma mapping failure.
+		 */
+		cmd->result = DID_IMM_RETRY << 16;
+		cmd->scsi_done(cmd);
+	}
+}
+
+/* Running in struct Scsi_Host->host_lock less mode */
+static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
+{
+	struct ctlr_info *h;
+	struct hpsa_scsi_dev_t *dev;
+	unsigned char scsi3addr[8];
+	struct CommandList *c;
+	int rc = 0;
+
+	/* Get the ptr to our adapter structure out of cmd->host. */
+	h = sdev_to_hba(cmd->device);
+	dev = cmd->device->hostdata;
+	if (!dev) {
+		cmd->result = DID_NO_CONNECT << 16;
+		cmd->scsi_done(cmd);
+		return 0;
+	}
+	memcpy(scsi3addr, dev->scsi3addr, sizeof(scsi3addr));
+
+	if (unlikely(lockup_detected(h))) {
+		cmd->result = DID_NO_CONNECT << 16;
+		cmd->scsi_done(cmd);
+		return 0;
+	}
+	c = cmd_tagged_alloc(h, cmd);
+
+	if (unlikely(lockup_detected(h))) {
+		cmd->result = DID_NO_CONNECT << 16;
+		cmd_tagged_free(h, c); /* FIXME may not be necessary, as lockup detector also frees everything */
+		cmd->scsi_done(cmd);
+		return 0;
+	}
+
+	if (hpsa_scatter_gather(h, c, cmd) < 0) { /* Fill SG list */
+		cmd_free(h, c);
+		return SCSI_MLQUEUE_HOST_BUSY;
+	}
+	enqueue_cmd_and_start_io(h, c);
+	/* the cmd'll come back via intr handler in complete_scsi_command()  */
+	return 0;
+}
+
+static void hpsa_command_resubmit_worker(struct work_struct *work)
+{
+	struct scsi_cmnd *cmd;
+	struct hpsa_scsi_dev_t *dev;
+	struct CommandList *c =
+			container_of(work, struct CommandList, work);
+
+	cmd = c->scsi_cmd;
+	dev = cmd->device->hostdata;
+	if (!dev) {
+		cmd->result = DID_NO_CONNECT << 16;
+		cmd->scsi_done(cmd);
+		return;
+	}
+	if (hpsa_ciss_submit(c->h, c, cmd, dev->scsi3addr)) {
+		/*
+		 * If we get here, it means dma mapping failed. Try
+		 * again via scsi mid layer, which will then get
+		 * SCSI_MLQUEUE_HOST_BUSY.
+		 */
+		cmd->result = DID_IMM_RETRY << 16;
+		cmd->scsi_done(cmd);
+	}
+}
+
+/* Running in struct Scsi_Host->host_lock less mode */
+static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
+{
+	struct ctlr_info *h;
+	struct hpsa_scsi_dev_t *dev;
+	unsigned char scsi3addr[8];
+	struct CommandList *c;
+	int rc = 0;
+
+	/* Get the ptr to our adapter structure out of cmd->host. */
+	h = sdev_to_hba(cmd->device);
+	dev = cmd->device->hostdata;
+	if (!dev) {
+		cmd->result = DID_NO_CONNECT << 16;
+		cmd->scsi_done(cmd);
+		return 0;
+	}
+	memcpy(scsi3addr, dev->scsi3addr, sizeof(scsi3addr));
+
+	if (unlikely(lockup_detected(h))) {
+		cmd->result = DID_ERROR << 16;
+		cmd->scsi_done(cmd);
+		return 0;
+	}
+	c = cmd_alloc(h);
+	if (c == NULL) {			/* trouble... */
+		dev_err(&h->pdev->dev, "cmd_alloc returned NULL!\n");
+		return SCSI_MLQUEUE_HOST_BUSY;
+	}
+
+=======
+>>>>>>> patched
+	/* Call alternate submit routine for I/O accelerated commands.
+	 * Retries always go down the normal I/O path.
+	 */
+	if (likely(cmd->retries == 0 &&
+		cmd->request->cmd_type == REQ_TYPE_FS &&
+		h->acciopath_status)) {
+		rc = hpsa_ioaccel_submit(h, c, cmd, scsi3addr);
+		if (rc == 0)
+			return 0;
+		if (rc == SCSI_MLQUEUE_HOST_BUSY)
+			return SCSI_MLQUEUE_HOST_BUSY;
+	}
+	return hpsa_ciss_submit(h, c, cmd, scsi3addr);
+}
+
+static int do_not_scan_if_controller_locked_up(struct ctlr_info *h)
+{
+	unsigned long flags;
+
+	/*
+	 * Don't let rescans be initiated on a controller known
+	 * to be locked up.  If the controller locks up *during*
+	 * a rescan, that thread is probably hosed, but at least
+	 * we can prevent new rescan threads from piling up on a
+	 * locked up controller.
+	 */
+	if (unlikely(lockup_detected(h))) {
+		spin_lock_irqsave(&h->scan_lock, flags);
+		h->scan_finished = 1;
+		wake_up_all(&h->scan_wait_queue);
+		spin_unlock_irqrestore(&h->scan_lock, flags);
+		return 1;
+	}
+	return 0;
+}
+
+static void hpsa_scan_start(struct Scsi_Host *sh)
+{
+	struct ctlr_info *h = shost_to_hba(sh);
+	unsigned long flags;
+
+	if (do_not_scan_if_controller_locked_up(h))
+		return;
+
+	/* wait until any scan already in progress is finished. */
+	while (1) {
+		spin_lock_irqsave(&h->scan_lock, flags);
+		if (h->scan_finished)
+			break;
+		spin_unlock_irqrestore(&h->scan_lock, flags);
+		wait_event(h->scan_wait_queue, h->scan_finished);
+		/* Note: We don't need to worry about a race between this
+		 * thread and driver unload because the midlayer will
+		 * have incremented the reference count, so unload won't
+		 * happen if we're in here.
+		 */
+	}
 	h->scan_finished = 0; /* mark scan as in progress */
 	spin_unlock_irqrestore(&h->scan_lock, flags);
 
@@ -4152,39 +5094,65 @@ static int hpsa_scan_finished(struct Scsi_Host *sh,
 	return finished;
 }
 
+/* scsi host template change_queue_depth function */
 static int hpsa_change_queue_depth(struct scsi_device *sdev,
 	int qdepth, int reason)
 {
-	struct ctlr_info *h = sdev_to_hba(sdev);
+	if (reason == SCSI_QDEPTH_DEFAULT || reason == SCSI_QDEPTH_RAMP_UP) {
+		struct hpsa_scsi_dev_t *logical_drive = sdev->hostdata;
 
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -ENOTSUPP;
+		if (!logical_drive)
+			return -ENODEV;
+
+		if (qdepth < 1)
+			qdepth = 1;
+		else if (qdepth > logical_drive->queue_depth)
+			qdepth = logical_drive->queue_depth;
 
-	if (qdepth < 1)
-		qdepth = 1;
+		scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
+	} else if (reason == SCSI_QDEPTH_QFULL)
+		scsi_track_queue_full(sdev, qdepth);
 	else
-		if (qdepth > h->nr_cmds)
-			qdepth = h->nr_cmds;
-	scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
+		return -ENOTSUPP;
+
 	return sdev->queue_depth;
 }
 
-static void hpsa_unregister_scsi(struct ctlr_info *h)
+static int hpsa_change_queue_type(struct scsi_device *sdev, int tag_type)
 {
-	/* we are being forcibly unloaded, and may not refuse. */
-	scsi_remove_host(h->scsi_host);
-	scsi_host_put(h->scsi_host);
-	h->scsi_host = NULL;
+	if (sdev->tagged_supported) {
+		if (shost_use_blk_mq(sdev->host)) {
+			scsi_set_tag_type(sdev, tag_type);
+			if (tag_type)
+				scsi_activate_tcq(sdev, sdev->queue_depth);
+			else
+				scsi_deactivate_tcq(sdev, sdev->queue_depth);
+		} else {
+			/* We require tags for our internal cmd allocation; if
+			 * the caller wants to switch tag types, that's fine,
+			 * but don't let them be disabled. */
+			if (tag_type)
+				scsi_set_tag_type(sdev, tag_type);
+			else
+				tag_type = scsi_get_tag_type(sdev);
+		}
+	} else {
+		BUG_ON(!shost_use_blk_mq(sdev->host));
+		tag_type = 0;
+	}
+
+	return tag_type;
 }
 
-static int hpsa_register_scsi(struct ctlr_info *h)
+static int hpsa_scsi_host_alloc(struct ctlr_info *h)
 {
 	struct Scsi_Host *sh;
-	int error;
 
 	sh = scsi_host_alloc(&hpsa_driver_template, sizeof(h));
-	if (sh == NULL)
-		goto fail;
+	if (sh == NULL) {
+		dev_err(&h->pdev->dev, "scsi_host_alloc failed\n");
+		return -ENOMEM;
+	}
 
 	sh->io_port = 0;
 	sh->n_io_port = 0;
@@ -4193,80 +5161,133 @@ static int hpsa_register_scsi(struct ctlr_info *h)
 	sh->max_cmd_len = MAX_COMMAND_SIZE;
 	sh->max_lun = HPSA_MAX_LUN;
 	sh->max_id = HPSA_MAX_LUN;
-	sh->can_queue = h->nr_cmds;
-	if (h->hba_mode_enabled)
-		sh->cmd_per_lun = 7;
-	else
-		sh->cmd_per_lun = h->nr_cmds;
+	sh->can_queue = h->nr_cmds - HPSA_NRESERVED_CMDS;
+	sh->cmd_per_lun = sh->can_queue;
 	sh->sg_tablesize = h->maxsgentries;
-	h->scsi_host = sh;
 	sh->hostdata[0] = (unsigned long) h;
 	sh->irq = h->intr[h->intr_mode];
 	sh->unique_id = sh->irq;
-	error = scsi_add_host(sh, &h->pdev->dev);
-	if (error)
-		goto fail_host_put;
-	scsi_scan_host(sh);
+	if (!shost_use_blk_mq(sh)) {
+		int error = scsi_init_shared_tag_map(sh, sh->can_queue);
+
+		if (error) {
+			dev_err(&h->pdev->dev,
+				"%s: scs_init_shared_tag_map failed for controller %d\n",
+				__func__, h->ctlr);
+			scsi_host_put(sh);
+			return error;
+		}
+	}
+	h->scsi_host = sh;
+	return 0;
+}
+
+static int hpsa_scsi_add_host(struct ctlr_info *h)
+{
+	int rv;
+
+	rv = scsi_add_host(h->scsi_host, &h->pdev->dev);
+	if (rv) {
+		dev_err(&h->pdev->dev, "scsi_add_host failed\n");
+		return rv;
+	}
+	scsi_scan_host(h->scsi_host);
 	return 0;
+}
 
- fail_host_put:
-	dev_err(&h->pdev->dev, "%s: scsi_add_host"
-		" failed for controller %d\n", __func__, h->ctlr);
-	scsi_host_put(sh);
-	return error;
- fail:
-	dev_err(&h->pdev->dev, "%s: scsi_host_alloc"
-		" failed for controller %d\n", __func__, h->ctlr);
-	return -ENOMEM;
+/* Send a TEST_UNIT_READY command to the specified LUN using the specified
+ * reply queue; returns zero if the unit is ready, and non-zero otherwise. */
+static int hpsa_send_test_unit_ready(struct ctlr_info *h,
+				struct CommandList *c, unsigned char lunaddr[],
+				int reply_queue)
+{
+	int rc;
+
+	/* Send the Test Unit Ready, fill_cmd can't fail, no mapping */
+	(void) fill_cmd(c, TEST_UNIT_READY, h,
+			NULL, 0, 0, lunaddr, TYPE_CMD);
+	rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
+	if (rc)
+		return rc;
+	/* no unmap needed here because no data xfer. */
+
+	/* Check if the unit is already ready. */
+	if (c->err_info->CommandStatus == CMD_SUCCESS)
+		return 0;
+
+	/* The first command sent after reset will receive "unit attention" to
+	 * indicate that the LUN has been reset...this is actually what we're
+	 * looking for (but, success is good too). */
+	if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
+		c->err_info->ScsiStatus == SAM_STAT_CHECK_CONDITION &&
+			(c->err_info->SenseInfo[2] == NO_SENSE ||
+			 c->err_info->SenseInfo[2] == UNIT_ATTENTION))
+		return 0;
+
+	return 1;
 }
 
-static int wait_for_device_to_become_ready(struct ctlr_info *h,
-	unsigned char lunaddr[])
+/* Wait for a TEST_UNIT_READY command to complete, retrying as necessary;
+ * returns zero when the unit is ready, and non-zero when giving up. */
+static int hpsa_wait_for_test_unit_ready(struct ctlr_info *h, struct CommandList *c,
+				    unsigned char lunaddr[], int reply_queue)
 {
 	int rc;
 	int count = 0;
 	int waittime = 1; /* seconds */
-	struct CommandList *c;
-
-	c = cmd_special_alloc(h);
-	if (!c) {
-		dev_warn(&h->pdev->dev, "out of memory in "
-			"wait_for_device_to_become_ready.\n");
-		return IO_ERROR;
-	}
 
 	/* Send test unit ready until device ready, or give up. */
-	while (count < HPSA_TUR_RETRY_LIMIT) {
+	for (count = 0; count < HPSA_TUR_RETRY_LIMIT; count++) {
 
 		/* Wait for a bit.  do this first, because if we send
 		 * the TUR right away, the reset will just abort it.
 		 */
 		msleep(1000 * waittime);
-		count++;
-		rc = 0; /* Device ready. */
+
+		rc = hpsa_send_test_unit_ready(h, c, lunaddr, reply_queue);
+		if (!rc)
+			break;
 
 		/* Increase wait time with each try, up to a point. */
 		if (waittime < HPSA_MAX_WAIT_INTERVAL_SECS)
 			waittime = waittime * 2;
 
-		/* Send the Test Unit Ready, fill_cmd can't fail, no mapping */
-		(void) fill_cmd(c, TEST_UNIT_READY, h,
-				NULL, 0, 0, lunaddr, TYPE_CMD);
-		hpsa_scsi_do_simple_cmd_core(h, c);
-		/* no unmap needed here because no data xfer. */
+		dev_warn(&h->pdev->dev,
+			 "waiting %d secs for device to become ready.\n",
+			 waittime);
+	}
 
-		if (c->err_info->CommandStatus == CMD_SUCCESS)
-			break;
+	return rc;
+}
 
-		if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
-			c->err_info->ScsiStatus == SAM_STAT_CHECK_CONDITION &&
-			(c->err_info->SenseInfo[2] == NO_SENSE ||
-			c->err_info->SenseInfo[2] == UNIT_ATTENTION))
-			break;
+static int wait_for_device_to_become_ready(struct ctlr_info *h,
+					   unsigned char lunaddr[],
+					   int reply_queue)
+{
+	int first_queue;
+	int last_queue;
+	int rq;
+	int rc = 0;
+	struct CommandList *c;
+
+	c = cmd_alloc(h);
+
+	/* If no specific reply queue was requested, then send the TUR
+	 * repeatedly, requesting a reply on each reply queue; otherwise execute
+	 * the loop exactly once using only the specified queue. */
+	if (likely(reply_queue == DEFAULT_REPLY_QUEUE)) {
+		first_queue = 0;
+		last_queue = h->nreply_queues - 1;
+	} else {
+		first_queue = reply_queue;
+		last_queue = reply_queue;
+	}
 
-		dev_warn(&h->pdev->dev, "waiting %d secs "
-			"for device to become ready.\n", waittime);
-		rc = 1; /* device not ready. */
+	for (rq = first_queue; rq <= last_queue; rq++) {
+		rc = hpsa_wait_for_test_unit_ready(h, c, lunaddr,
+			reply_queue == DEFAULT_REPLY_QUEUE ? rq : reply_queue);
+		if (rc)
+			break;
 	}
 
 	if (rc)
@@ -4274,7 +5295,7 @@ static int wait_for_device_to_become_ready(struct ctlr_info *h,
 	else
 		dev_warn(&h->pdev->dev, "device is ready.\n");
 
-	cmd_special_free(h, c);
+	cmd_free(h, c);
 	return rc;
 }
 
@@ -4291,20 +5312,37 @@ static int hpsa_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
 	h = sdev_to_hba(scsicmd->device);
 	if (h == NULL) /* paranoia */
 		return FAILED;
+
+	if (lockup_detected(h))
+		return FAILED;
+
 	dev = scsicmd->device->hostdata;
 	if (!dev) {
 		dev_err(&h->pdev->dev, "hpsa_eh_device_reset_handler: "
 			"device lookup failed.\n");
 		return FAILED;
 	}
-	dev_warn(&h->pdev->dev, "resetting device %d:%d:%d:%d\n",
-		h->scsi_host->host_no, dev->bus, dev->target, dev->lun);
+	dev_warn(&h->pdev->dev,
+		"resetting scsi %d:%d:%d:%d: %s %.8s %.16s RAID-%s SSDSmartPathCap%c En%c Exp=%d\n",
+		h->scsi_host->host_no, dev->bus, dev->target, dev->lun,
+		scsi_device_type(dev->devtype),
+		dev->vendor,
+		dev->model,
+		dev->raid_level > RAID_UNKNOWN ?
+			"RAID-?" : raid_label[dev->raid_level],
+		dev->offload_config ? '+' : '-',
+		dev->offload_enabled ? '+' : '-',
+		dev->expose_state);
+
 	/* send a reset to the SCSI LUN which the command was sent to */
-	rc = hpsa_send_reset(h, dev->scsi3addr, HPSA_RESET_TYPE_LUN);
-	if (rc == 0 && wait_for_device_to_become_ready(h, dev->scsi3addr) == 0)
+	rc = hpsa_send_reset(h, dev->scsi3addr, HPSA_RESET_TYPE_LUN,
+			     DEFAULT_REPLY_QUEUE);
+	if (rc == 0)
 		return SUCCESS;
 
-	dev_warn(&h->pdev->dev, "resetting device failed.\n");
+	dev_warn(&h->pdev->dev,
+		"resetting scsi %d:%d:%d:%d failed\n",
+		h->scsi_host->host_no, dev->bus, dev->target, dev->lun);
 	return FAILED;
 }
 
@@ -4329,8 +5367,8 @@ static void hpsa_get_tag(struct ctlr_info *h,
 	if (c->cmd_type == CMD_IOACCEL1) {
 		struct io_accel1_cmd *cm1 = (struct io_accel1_cmd *)
 			&h->ioaccel_cmd_pool[c->cmdindex];
-		*tagupper = cm1->Tag.upper;
-		*taglower = cm1->Tag.lower;
+		*tagupper = (u32) (cm1->tag >> 32);
+		*taglower = (u32) (cm1->tag & 0x0ffffffffULL);
 		return;
 	}
 	if (c->cmd_type == CMD_IOACCEL2) {
@@ -4341,33 +5379,28 @@ static void hpsa_get_tag(struct ctlr_info *h,
 		*taglower = cm2->Tag;
 		return;
 	}
-	*tagupper = c->Header.Tag.upper;
-	*taglower = c->Header.Tag.lower;
+	*tagupper = (u32) (c->Header.tag >> 32);
+	*taglower = (u32) (c->Header.tag & 0x0ffffffffULL);
 }
 
-
 static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr,
-	struct CommandList *abort, int swizzle)
+	struct CommandList *abort, int reply_queue)
 {
 	int rc = IO_OK;
 	struct CommandList *c;
 	struct ErrorInfo *ei;
 	u32 tagupper, taglower;
 
-	c = cmd_special_alloc(h);
-	if (c == NULL) {	/* trouble... */
-		dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
-		return -ENOMEM;
-	}
+	c = cmd_alloc(h);
 
 	/* fill_cmd can't fail here, no buffer to map */
-	(void) fill_cmd(c, HPSA_ABORT_MSG, h, abort,
+	(void) fill_cmd(c, HPSA_ABORT_MSG, h, &abort->Header.tag,
 		0, 0, scsi3addr, TYPE_MSG);
-	if (swizzle)
+	if (h->needs_abort_tags_swizzled)
 		swizzle_abort_tag(&c->Request.CDB[4]);
-	hpsa_scsi_do_simple_cmd_core(h, c);
+	(void) hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
 	hpsa_get_tag(h, abort, &taglower, &tagupper);
-	dev_dbg(&h->pdev->dev, "%s: Tag:0x%08x:%08x: do_simple_cmd_core completed.\n",
+	dev_dbg(&h->pdev->dev, "%s: Tag:0x%08x:%08x: do_simple_cmd(abort) completed.\n",
 		__func__, tagupper, taglower);
 	/* no unmap needed here because no data xfer. */
 
@@ -4375,6 +5408,9 @@ static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr,
 	switch (ei->CommandStatus) {
 	case CMD_SUCCESS:
 		break;
+	case CMD_TMF_STATUS:
+		rc = hpsa_evaluate_tmf_status(h, c);
+		break;
 	case CMD_UNABORTABLE: /* Very common, don't make noise. */
 		rc = -1;
 		break;
@@ -4385,60 +5421,51 @@ static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr,
 		rc = -1;
 		break;
 	}
-	cmd_special_free(h, c);
+	cmd_free(h, c);
 	dev_dbg(&h->pdev->dev, "%s: Tag:0x%08x:%08x: Finished.\n",
 		__func__, tagupper, taglower);
 	return rc;
 }
 
-/*
- * hpsa_find_cmd_in_queue
- *
- * Used to determine whether a command (find) is still present
- * in queue_head.   Optionally excludes the last element of queue_head.
- *
- * This is used to avoid unnecessary aborts.  Commands in h->reqQ have
- * not yet been submitted, and so can be aborted by the driver without
- * sending an abort to the hardware.
- *
- * Returns pointer to command if found in queue, NULL otherwise.
- */
-static struct CommandList *hpsa_find_cmd_in_queue(struct ctlr_info *h,
-			struct scsi_cmnd *find, struct list_head *queue_head)
+static void setup_ioaccel2_abort_cmd(struct CommandList *c, struct ctlr_info *h,
+	struct CommandList *command_to_abort, int reply_queue)
 {
-	unsigned long flags;
-	struct CommandList *c = NULL;	/* ptr into cmpQ */
+	struct io_accel2_cmd *c2 = &h->ioaccel2_cmd_pool[c->cmdindex];
+	struct hpsa_tmf_struct *ac = (struct hpsa_tmf_struct *) c2;
+	struct io_accel2_cmd *c2a =
+		&h->ioaccel2_cmd_pool[command_to_abort->cmdindex];
+	struct scsi_cmnd *scmd =
+		(struct scsi_cmnd *) command_to_abort->scsi_cmd;
+	struct hpsa_scsi_dev_t *dev = scmd->device->hostdata;
 
-	if (!find)
-		return 0;
-	spin_lock_irqsave(&h->lock, flags);
-	list_for_each_entry(c, queue_head, list) {
-		if (c->scsi_cmd == NULL) /* e.g.: passthru ioctl */
-			continue;
-		if (c->scsi_cmd == find) {
-			spin_unlock_irqrestore(&h->lock, flags);
-			return c;
-		}
-	}
-	spin_unlock_irqrestore(&h->lock, flags);
-	return NULL;
-}
+	/*
+	 * We're overlaying struct hpsa_tmf_struct on top of something which
+	 * was allocated as a struct io_accel2_cmd, so we better be sure it
+	 * actually fits, and doesn't overrun the error info space.
+	 */
+	BUILD_BUG_ON(sizeof(struct hpsa_tmf_struct) >
+			sizeof(struct io_accel2_cmd));
+	BUG_ON(offsetof(struct io_accel2_cmd, error_data) <
+			offsetof(struct hpsa_tmf_struct, error_len) +
+				sizeof(ac->error_len));
 
-static struct CommandList *hpsa_find_cmd_in_queue_by_tag(struct ctlr_info *h,
-					u8 *tag, struct list_head *queue_head)
-{
-	unsigned long flags;
-	struct CommandList *c;
+	c->cmd_type = IOACCEL2_TMF;
+	/* Adjust the DMA address to point to the accelerated command buffer */
+	c->busaddr = (u32) h->ioaccel2_cmd_pool_dhandle +
+				(c->cmdindex * sizeof(struct io_accel2_cmd));
+	BUG_ON(c->busaddr & 0x0000007F);
 
-	spin_lock_irqsave(&h->lock, flags);
-	list_for_each_entry(c, queue_head, list) {
-		if (memcmp(&c->Header.Tag, tag, 8) != 0)
-			continue;
-		spin_unlock_irqrestore(&h->lock, flags);
-		return c;
-	}
-	spin_unlock_irqrestore(&h->lock, flags);
-	return NULL;
+	memset(ac, 0, sizeof(*c2)); /* yes this is correct */
+	ac->iu_type = IOACCEL2_IU_TMF_TYPE;
+	ac->reply_queue = reply_queue;
+	ac->tmf = IOACCEL2_TMF_ABORT;
+	ac->it_nexus = cpu_to_le32((u32) dev->ioaccel_handle);
+	memset(ac->lun_id, 0, sizeof(ac->lun_id));
+	ac->tag = c->cmdindex << DIRECT_LOOKUP_SHIFT;
+	ac->abort_tag = c2a->Tag;
+	ac->error_ptr = cpu_to_le64((u64) c->busaddr +
+			offsetof(struct io_accel2_cmd, error_data));
+	ac->error_len = cpu_to_le32((u32) sizeof(c2->error_data));
 }
 
 /* ioaccel2 path firmware cannot handle abort task requests.
@@ -4449,7 +5476,7 @@ static struct CommandList *hpsa_find_cmd_in_queue_by_tag(struct ctlr_info *h,
  */
 
 static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
-	unsigned char *scsi3addr, struct CommandList *abort)
+	unsigned char *scsi3addr, struct CommandList *abort, int reply_queue)
 {
 	int rc = IO_OK;
 	struct scsi_cmnd *scmd; /* scsi command within request being aborted */
@@ -4458,7 +5485,7 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
 	unsigned char *psa = &phys_scsi3addr[0];
 
 	/* Get a pointer to the hpsa logical device. */
-	scmd = (struct scsi_cmnd *) abort->scsi_cmd;
+	scmd = abort->scsi_cmd;
 	dev = (struct hpsa_scsi_dev_t *)(scmd->device->hostdata);
 	if (dev == NULL) {
 		dev_warn(&h->pdev->dev,
@@ -4468,7 +5495,7 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
 
 	if (h->raid_offload_debug > 0)
 		dev_info(&h->pdev->dev,
-			"Reset as abort: Abort requested on C%d:B%d:T%d:L%d scsi3addr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+			"Reset as abort: scsi %d:%d:%d:%d scsi3addr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
 			h->scsi_host->host_no, dev->bus, dev->target, dev->lun,
 			scsi3addr[0], scsi3addr[1], scsi3addr[2], scsi3addr[3],
 			scsi3addr[4], scsi3addr[5], scsi3addr[6], scsi3addr[7]);
@@ -4491,7 +5518,7 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
 			"Reset as abort: Resetting physical device at scsi3addr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
 			psa[0], psa[1], psa[2], psa[3],
 			psa[4], psa[5], psa[6], psa[7]);
-	rc = hpsa_send_reset(h, psa, HPSA_RESET_TYPE_TARGET);
+	rc = hpsa_send_reset(h, psa, HPSA_RESET_TYPE_TARGET, reply_queue);
 	if (rc != 0) {
 		dev_warn(&h->pdev->dev,
 			"Reset as abort: Failed on physical device at scsi3addr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
@@ -4501,7 +5528,7 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
 	}
 
 	/* wait for device to recover */
-	if (wait_for_device_to_become_ready(h, psa) != 0) {
+	if (wait_for_device_to_become_ready(h, psa, reply_queue) != 0) {
 		dev_warn(&h->pdev->dev,
 			"Reset as abort: Failed: Device never recovered from reset: 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
 			psa[0], psa[1], psa[2], psa[3],
@@ -4518,48 +5545,93 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
 	return rc; /* success */
 }
 
-/* Some Smart Arrays need the abort tag swizzled, and some don't.  It's hard to
- * tell which kind we're dealing with, so we send the abort both ways.  There
- * shouldn't be any collisions between swizzled and unswizzled tags due to the
- * way we construct our tags but we check anyway in case the assumptions which
- * make this true someday become false.
- */
-static int hpsa_send_abort_both_ways(struct ctlr_info *h,
-	unsigned char *scsi3addr, struct CommandList *abort)
+static int hpsa_send_abort_ioaccel2(struct ctlr_info *h,
+	struct CommandList *abort, int reply_queue)
 {
-	u8 swizzled_tag[8];
+	int rc = IO_OK;
 	struct CommandList *c;
-	int rc = 0, rc2 = 0;
+	u32 taglower, tagupper;
+	struct hpsa_scsi_dev_t *dev;
+	struct io_accel2_cmd *c2;
 
+	dev = abort->scsi_cmd->device->hostdata;
+	if (!dev->offload_enabled && !dev->hba_ioaccel_enabled)
+		return -1;
+
+	c = cmd_alloc(h);
+	setup_ioaccel2_abort_cmd(c, h, abort, reply_queue);
+	c2 = &h->ioaccel2_cmd_pool[c->cmdindex];
+	(void) hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
+	hpsa_get_tag(h, abort, &taglower, &tagupper);
+	dev_dbg(&h->pdev->dev,
+		"%s: Tag:0x%08x:%08x: do_simple_cmd(ioaccel2 abort) completed.\n",
+		__func__, tagupper, taglower);
+	/* no unmap needed here because no data xfer. */
+
+	dev_dbg(&h->pdev->dev,
+		"%s: Tag:0x%08x:%08x: abort service response = 0x%02x.\n",
+		__func__, tagupper, taglower, c2->error_data.serv_response);
+	switch (c2->error_data.serv_response) {
+	case IOACCEL2_SERV_RESPONSE_TMF_COMPLETE:
+	case IOACCEL2_SERV_RESPONSE_TMF_SUCCESS:
+		rc = 0;
+		break;
+	case IOACCEL2_SERV_RESPONSE_TMF_REJECTED:
+	case IOACCEL2_SERV_RESPONSE_FAILURE:
+	case IOACCEL2_SERV_RESPONSE_TMF_WRONG_LUN:
+		rc = -1;
+		break;
+	default:
+		dev_warn(&h->pdev->dev,
+			"%s: Tag:0x%08x:%08x: unknown abort service response x0%02x\n",
+			__func__, tagupper, taglower,
+			c2->error_data.serv_response);
+		rc = -1;
+	}
+	cmd_free(h, c);
+	dev_dbg(&h->pdev->dev, "%s: Tag:0x%08x:%08x: Finished.\n", __func__,
+		tagupper, taglower);
+	return rc;
+}
+
+static int hpsa_send_abort_both_ways(struct ctlr_info *h,
+	unsigned char *scsi3addr, struct CommandList *abort, int reply_queue)
+{
 	/* ioccelerator mode 2 commands should be aborted via the
 	 * accelerated path, since RAID path is unaware of these commands,
-	 * but underlying firmware can't handle abort TMF.
-	 * Change abort to physical device reset.
+	 * but not all underlying firmware can handle abort TMF.
+	 * Change abort to physical device reset when abort TMF is unsupported.
 	 */
-	if (abort->cmd_type == CMD_IOACCEL2)
-		return hpsa_send_reset_as_abort_ioaccel2(h, scsi3addr, abort);
+	if (abort->cmd_type == CMD_IOACCEL2) {
+		if (HPSATMF_IOACCEL_ENABLED & h->TMFSupportFlags)
+			return hpsa_send_abort_ioaccel2(h, abort,
+						reply_queue);
+		else
+			return hpsa_send_reset_as_abort_ioaccel2(h, scsi3addr,
+							abort, reply_queue);
+	}
+	return hpsa_send_abort(h, scsi3addr, abort, reply_queue);
+}
 
-	/* we do not expect to find the swizzled tag in our queue, but
-	 * check anyway just to be sure the assumptions which make this
-	 * the case haven't become wrong.
-	 */
-	memcpy(swizzled_tag, &abort->Request.CDB[4], 8);
-	swizzle_abort_tag(swizzled_tag);
-	c = hpsa_find_cmd_in_queue_by_tag(h, swizzled_tag, &h->cmpQ);
-	if (c != NULL) {
-		dev_warn(&h->pdev->dev, "Unexpectedly found byte-swapped tag in completion queue.\n");
-		return hpsa_send_abort(h, scsi3addr, abort, 0);
-	}
-	rc = hpsa_send_abort(h, scsi3addr, abort, 0);
-
-	/* if the command is still in our queue, we can't conclude that it was
-	 * aborted (it might have just completed normally) but in any case
-	 * we don't need to try to abort it another way.
-	 */
-	c = hpsa_find_cmd_in_queue(h, abort->scsi_cmd, &h->cmpQ);
-	if (c)
-		rc2 = hpsa_send_abort(h, scsi3addr, abort, 1);
-	return rc && rc2;
+/* Find out which reply queue a command was meant to return on */
+static int hpsa_extract_reply_queue(struct ctlr_info *h,
+					struct CommandList *c)
+{
+	if (c->cmd_type == CMD_IOACCEL2)
+		return h->ioaccel2_cmd_pool[c->cmdindex].reply_queue;
+	return c->Header.ReplyQueue;
+}
+
+/*
+ * Limit concurrency of abort commands to prevent
+ * over-subscription of commands
+ */
+static inline int wait_for_available_abort_cmd(struct ctlr_info *h)
+{
+#define ABORT_CMD_WAIT_MSECS 5000
+	return !wait_event_timeout(h->abort_cmd_wait_queue,
+			atomic_dec_if_positive(&h->abort_cmds_available) >= 0,
+			msecs_to_jiffies(ABORT_CMD_WAIT_MSECS));
 }
 
 /* Send an abort for the specified command.
@@ -4569,31 +5641,64 @@ static int hpsa_send_abort_both_ways(struct ctlr_info *h,
 static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
 {
 
-	int i, rc;
+	int rc;
 	struct ctlr_info *h;
 	struct hpsa_scsi_dev_t *dev;
 	struct CommandList *abort; /* pointer to command to be aborted */
-	struct CommandList *found;
 	struct scsi_cmnd *as;	/* ptr to scsi cmd inside aborted command. */
 	char msg[256];		/* For debug messaging. */
 	int ml = 0;
 	u32 tagupper, taglower;
+	int refcount, reply_queue;
+
 
 	/* Find the controller of the command to be aborted */
 	h = sdev_to_hba(sc->device);
 	if (WARN(h == NULL,
-			"ABORT REQUEST FAILED, Controller lookup failed.\n"))
+			"scsi ?:?:?:? scmd %p ABORT FAILED, Controller lookup failed.\n",
+			sc))
 		return FAILED;
 
+	dev_warn(&h->pdev->dev, "abort 0\n");
+	/* if controller locked up, we can guarantee command won't complete */
+	if (lockup_detected(h)) {
+		dev_warn(&h->pdev->dev, "abort 0.1\n");
+		dev_warn(&h->pdev->dev,
+			"scsi %d:%d:%d:%llu scmd %p ABORT FAILED, lockup detected\n",
+			h->scsi_host->host_no, sc->device->channel,
+			sc->device->id, sc->device->lun, sc);
+		return FAILED;
+	} else {
+		/* good time to check if controller lockup has occurred */
+		/* FIXME eh_timeout_handler would be even better.
+		 * for testing, abort is just being used for timeouts,
+		 * so is equivalent */
+		dev_warn(&h->pdev->dev, "abort 0.2\n");
+		detect_controller_lockup(h);
+		dev_warn(&h->pdev->dev, "abort 0.3\n");
+
+		/* check again in case one just occurred */
+		if (lockup_detected(h)) {
+			dev_warn(&h->pdev->dev, "abort 0.4\n");
+			dev_warn(&h->pdev->dev,
+				"scsi %d:%d:%d:%llu scmd %p ABORT FAILED, lockup detected\n",
+				h->scsi_host->host_no, sc->device->channel,
+				sc->device->id, sc->device->lun, sc);
+			return FAILED;
+		}
+	}
+
+	dev_warn(&h->pdev->dev, "abort 1\n");
 	/* Check that controller supports some kind of task abort */
 	if (!(HPSATMF_PHYS_TASK_ABORT & h->TMFSupportFlags) &&
 		!(HPSATMF_LOG_TASK_ABORT & h->TMFSupportFlags))
 		return FAILED;
 
+	dev_warn(&h->pdev->dev, "abort 2\n");
 	memset(msg, 0, sizeof(msg));
-	ml += sprintf(msg+ml, "ABORT REQUEST on C%d:B%d:T%d:L%llu ",
+	ml += sprintf(msg+ml, "scsi %d:%d:%d:%llu scmd %p ABORT ",
 		h->scsi_host->host_no, sc->device->channel,
-		sc->device->id, sc->device->lun);
+		sc->device->id, sc->device->lun, sc);
 
 	/* Find the device of the command to be aborted */
 	dev = sc->device->hostdata;
@@ -4603,184 +5708,229 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
 		return FAILED;
 	}
 
+	dev_warn(&h->pdev->dev, "abort 3\n");
 	/* Get SCSI command to be aborted */
 	abort = (struct CommandList *) sc->host_scribble;
 	if (abort == NULL) {
-		dev_err(&h->pdev->dev, "%s FAILED, Command to abort is NULL.\n",
-				msg);
+		/* This can happen if the command already completed. */
+		return SUCCESS;
+	}
+	dev_warn(&h->pdev->dev, "abort 4\n");
+	refcount = atomic_inc_return(&abort->refcount);
+	if (refcount == 1) { /* Command is done already. */
+		cmd_free(h, abort);
+		return SUCCESS;
+	}
+
+	dev_warn(&h->pdev->dev, "abort 5\n");
+	/* Don't bother trying the abort if we know it won't work. */
+	if (abort->cmd_type != CMD_IOACCEL2 &&
+		abort->cmd_type != CMD_IOACCEL1 && !dev->supports_aborts) {
+		cmd_free(h, abort);
 		return FAILED;
 	}
-	hpsa_get_tag(h, abort, &taglower, &tagupper);
-	ml += sprintf(msg+ml, "Tag:0x%08x:%08x ", tagupper, taglower);
-	as  = (struct scsi_cmnd *) abort->scsi_cmd;
-	if (as != NULL)
-		ml += sprintf(msg+ml, "Command:0x%x SN:0x%lx ",
-			as->cmnd[0], as->serial_number);
-	dev_dbg(&h->pdev->dev, "%s\n", msg);
-	dev_warn(&h->pdev->dev, "Abort request on C%d:B%d:T%d:L%d\n",
-		h->scsi_host->host_no, dev->bus, dev->target, dev->lun);
 
-	/* Search reqQ to See if command is queued but not submitted,
-	 * if so, complete the command with aborted status and remove
-	 * it from the reqQ.
+	dev_warn(&h->pdev->dev, "abort 6\n");
+	/* Check that we're aborting the right command.
+	 * It's possible the CommandList already completed and got re-used.
 	 */
-	found = hpsa_find_cmd_in_queue(h, sc, &h->reqQ);
-	if (found) {
-		found->err_info->CommandStatus = CMD_ABORTED;
-		finish_cmd(found);
-		dev_info(&h->pdev->dev, "%s Request SUCCEEDED (driver queue).\n",
-				msg);
+	if (abort->scsi_cmd != sc) {
+		cmd_free(h, abort);
 		return SUCCESS;
 	}
+	dev_warn(&h->pdev->dev, "abort 7\n");
 
-	/* not in reqQ, if also not in cmpQ, must have already completed */
-	found = hpsa_find_cmd_in_queue(h, sc, &h->cmpQ);
-	if (!found)  {
-		dev_dbg(&h->pdev->dev, "%s Request SUCCEEDED (not known to driver).\n",
-				msg);
-		return SUCCESS;
-	}
+	abort->abort_pending = true;
+	hpsa_get_tag(h, abort, &taglower, &tagupper);
+	reply_queue = hpsa_extract_reply_queue(h, abort);
+	ml += sprintf(msg+ml, "Tag:0x%08x:%08x ", tagupper, taglower);
+	as  = abort->scsi_cmd;
+	if (as != NULL)
+		ml += sprintf(msg+ml,
+			"CDBLen: %d CDB: 0x%02x%02x... SN: 0x%lx ",
+			as->cmd_len, as->cmnd[0], as->cmnd[1],
+			as->serial_number);
+	dev_warn(&h->pdev->dev, "%s BEING SENT\n", msg);
+	dev_warn(&h->pdev->dev, "abort 8 (waiting for abort command to be free)\n");
 
 	/*
 	 * Command is in flight, or possibly already completed
 	 * by the firmware (but not to the scsi mid layer) but we can't
 	 * distinguish which.  Send the abort down.
 	 */
-	rc = hpsa_send_abort_both_ways(h, dev->scsi3addr, abort);
+	if (wait_for_available_abort_cmd(h)) {
+		dev_warn(&h->pdev->dev,
+			"%s FAILED, timeout waiting for an abort command to become available.\n",
+			msg);
+		cmd_free(h, abort);
+		return FAILED;
+	}
+	dev_warn(&h->pdev->dev, "abort 9 (got abort command)\n");
+	rc = hpsa_send_abort_both_ways(h, dev->scsi3addr, abort, reply_queue);
+	atomic_inc(&h->abort_cmds_available);
+	wake_up_all(&h->abort_cmd_wait_queue);
 	if (rc != 0) {
-		dev_dbg(&h->pdev->dev, "%s Request FAILED.\n", msg);
-		dev_warn(&h->pdev->dev, "FAILED abort on device C%d:B%d:T%d:L%d\n",
-			h->scsi_host->host_no,
-			dev->bus, dev->target, dev->lun);
+		dev_warn(&h->pdev->dev, "%s SENT, FAILED\n", msg);
+		cmd_free(h, abort);
 		return FAILED;
 	}
-	dev_info(&h->pdev->dev, "%s REQUEST SUCCEEDED.\n", msg);
+	dev_warn(&h->pdev->dev, "abort 10\n");
+	dev_info(&h->pdev->dev, "%s SENT, SUCCESS\n", msg);
+	wait_event(h->abort_sync_wait_queue, atomic_read(&abort->refcount) == 1);
+	cmd_free(h, abort);
+	dev_warn(&h->pdev->dev, "abort 11\n");
+	return SUCCESS;
+}
+
+/*
+ * One of the upper layers has already gone to the trouble of picking out a
+ * unique, small-integer tag for this request.  If the blk-mq support is not
+ * available, we use the SCSI tag in the SCSI command; otherwise, we use the
+ * block layer tag in the embedded request structure.  We use an offset from
+ * that value as an index to select our command block.  (The offset allows us to
+ * reserve the low-numbered entries for our own uses.)
+ */
+static int hpsa_get_cmd_index(struct scsi_cmnd *scmd)
+{
+	struct scsi_device *dev = scmd->device;
+	int idx = shost_use_blk_mq(dev->host) ? scmd->request->tag : scmd->tag;
+
+	if (idx < 0) {
+		dev_err(&dev->sdev_dev , "Invalid block tag: %d\n", idx);
+		/* This value comes from an upper layer...it's not our bug. */
+		BUG_ON(idx < 0);
+	}
+
+	/* Offset to leave space for internal cmds. */
+	idx += HPSA_NRESERVED_CMDS;
+
+	return idx;
+}
+
+static void print_scsi_cmd(struct ctlr_info *h, struct scsi_cmnd *scmd, int idx)
+{
+	char format[50];
+
+	sprintf(format, "Tag %d: CDB: %dph\n", idx, scmd->cmd_len);
+	dev_warn(&h->pdev->dev, format, scmd->cmnd);
+}
+
+/*
+ * For operations with an associated SCSI command, a command block is allocated
+ * at init, and managed by cmd_tagged_alloc() and cmd_tagged_free() using the
+ * block request tag as an index into a table of entries.  cmd_tagged_free() is
+ * the complement, although cmd_free() may be called instead.
+ */
+static struct CommandList *cmd_tagged_alloc(struct ctlr_info *h,
+					    struct scsi_cmnd *scmd)
+{
+	int idx = hpsa_get_cmd_index(scmd);
+	struct CommandList *c = h->cmd_pool + idx;
+	int refcount = 0;
+
+	if (idx < HPSA_NRESERVED_CMDS || idx >= h->nr_cmds) {
+		dev_err(&h->pdev->dev, "Bad block tag: %d\n", idx);
+		/* The index value comes from the block layer, so if it's out of
+		 * bounds, it's probably not our bug.
+		 */
+		BUG_ON(idx < HPSA_NRESERVED_CMDS || idx >= h->nr_cmds);
+	}
+
+	refcount = atomic_inc_return(&c->refcount);
+	if (unlikely(refcount > 1)) {
+		/*
+		 * We expect that the SCSI layer will hand us a unique tag
+		 * value.  Thus, there should never be a collision here between
+		 * two requests; however, it's possible that a lingering abort
+		 * might have an outstanding reference when the next request
+		 * comes in (the block layer tends to reuse the last-used tag
+		 * first).  Hopefully, that won't be a problem.
+		 */
+		dev_warn(&h->pdev->dev,
+			"tag collision (tag=%d) in cmd_tagged_alloc().\n",
+			idx);
+		print_scsi_cmd(h, scmd, idx);
+	}
 
-	/* If the abort(s) above completed and actually aborted the
-	 * command, then the command to be aborted should already be
-	 * completed.  If not, wait around a bit more to see if they
-	 * manage to complete normally.
-	 */
-#define ABORT_COMPLETE_WAIT_SECS 30
-	for (i = 0; i < ABORT_COMPLETE_WAIT_SECS * 10; i++) {
-		found = hpsa_find_cmd_in_queue(h, sc, &h->cmpQ);
-		if (!found)
-			return SUCCESS;
-		msleep(100);
-	}
-	dev_warn(&h->pdev->dev, "%s FAILED. Aborted command has not completed after %d seconds.\n",
-		msg, ABORT_COMPLETE_WAIT_SECS);
-	return FAILED;
+	hpsa_cmd_partial_init(h, idx, c);
+	return c;
 }
 
+static void cmd_tagged_free(struct ctlr_info *h, struct CommandList *c)
+{
+	/*
+	 * Release our reference to the block.  We don't need to do anything
+	 * else to free it, because it is accessed by index.  (There's no point
+	 * in checking the result of the decrement, since we cannot guarantee
+	 * that there isn't a concurrent abort which is also accessing it.)
+	 */
+	(void)atomic_dec_and_test(&c->refcount);
+}
 
 /*
  * For operations that cannot sleep, a command block is allocated at init,
  * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
  * which ones are free or in use.  Lock must be held when calling this.
  * cmd_free() is the complement.
+ * This function never gives up and returns NULL.  If it hangs,
+ * another thread must call cmd_free() to free some tags.
  */
 static struct CommandList *cmd_alloc(struct ctlr_info *h)
 {
 	struct CommandList *c;
-	int i;
-	union u64bit temp64;
-	dma_addr_t cmd_dma_handle, err_dma_handle;
-	unsigned long flags;
+	int refcount, i;
+
+	/* There is some *extremely* small but non-zero chance that that
+	 * multiple threads could get in here, and one thread could
+	 * be scanning through the list of bits looking for a free
+	 * one, but the free ones are always behind him, and other
+	 * threads sneak in behind him and eat them before he can
+	 * get to them, so that while there is always a free one, a
+	 * very unlucky thread might be starved anyway, never able to
+	 * beat the other threads.  In reality, this happens so
+	 * infrequently as to be indistinguishable from never.
+	 *
+	 * Note that we start allocating commands before the SCSI host structure
+	 * is initialized.  Since the search starts at bit zero, this
+	 * all works, since we have at least one command structure available;
+	 * however, it means that the structures with the low indexes have to be
+	 * reserved for driver-initiated requests, while requests from the block
+	 * layer will use the higher indexes.
+	 */
 
-	spin_lock_irqsave(&h->lock, flags);
-	do {
-		i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
-		if (i == h->nr_cmds) {
-			spin_unlock_irqrestore(&h->lock, flags);
-			return NULL;
+	for (;;) {
+		i = find_first_zero_bit(h->cmd_pool_bits, HPSA_NRESERVED_CMDS);
+		if (unlikely(i >= HPSA_NRESERVED_CMDS))
+			continue;
+		c = h->cmd_pool + i;
+		refcount = atomic_inc_return(&c->refcount);
+		if (unlikely(refcount > 1)) {
+			cmd_free(h, c); /* already in use */
+			continue;
 		}
-	} while (test_and_set_bit
-		 (i & (BITS_PER_LONG - 1),
-		  h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
-	spin_unlock_irqrestore(&h->lock, flags);
-
-	c = h->cmd_pool + i;
-	memset(c, 0, sizeof(*c));
-	cmd_dma_handle = h->cmd_pool_dhandle
-	    + i * sizeof(*c);
-	c->err_info = h->errinfo_pool + i;
-	memset(c->err_info, 0, sizeof(*c->err_info));
-	err_dma_handle = h->errinfo_pool_dhandle
-	    + i * sizeof(*c->err_info);
-
-	c->cmdindex = i;
-
-	INIT_LIST_HEAD(&c->list);
-	c->busaddr = (u32) cmd_dma_handle;
-	temp64.val = (u64) err_dma_handle;
-	c->ErrDesc.Addr.lower = temp64.val32.lower;
-	c->ErrDesc.Addr.upper = temp64.val32.upper;
-	c->ErrDesc.Len = sizeof(*c->err_info);
-
-	c->h = h;
-	return c;
-}
-
-/* For operations that can wait for kmalloc to possibly sleep,
- * this routine can be called. Lock need not be held to call
- * cmd_special_alloc. cmd_special_free() is the complement.
- */
-static struct CommandList *cmd_special_alloc(struct ctlr_info *h)
-{
-	struct CommandList *c;
-	union u64bit temp64;
-	dma_addr_t cmd_dma_handle, err_dma_handle;
-
-	c = pci_zalloc_consistent(h->pdev, sizeof(*c), &cmd_dma_handle);
-	if (c == NULL)
-		return NULL;
-
-	c->cmd_type = CMD_SCSI;
-	c->cmdindex = -1;
-
-	c->err_info = pci_zalloc_consistent(h->pdev, sizeof(*c->err_info),
-					    &err_dma_handle);
-
-	if (c->err_info == NULL) {
-		pci_free_consistent(h->pdev,
-			sizeof(*c), c, cmd_dma_handle);
-		return NULL;
+		set_bit(i & (BITS_PER_LONG - 1),
+			h->cmd_pool_bits + (i / BITS_PER_LONG));
+		break; /* it's ours now. */
 	}
-
-	INIT_LIST_HEAD(&c->list);
-	c->busaddr = (u32) cmd_dma_handle;
-	temp64.val = (u64) err_dma_handle;
-	c->ErrDesc.Addr.lower = temp64.val32.lower;
-	c->ErrDesc.Addr.upper = temp64.val32.upper;
-	c->ErrDesc.Len = sizeof(*c->err_info);
-
-	c->h = h;
+	hpsa_cmd_partial_init(h, i, c);
 	return c;
 }
 
+/*
+ * This is the complementary operation to cmd_alloc().  Note, however, in some
+ * corner cases it may also be used to free blocks allocated by
+ * cmd_tagged_alloc() in which case the ref-count decrement does the trick and
+ * the clear-bit is harmless.
+ */
 static void cmd_free(struct ctlr_info *h, struct CommandList *c)
 {
-	int i;
-	unsigned long flags;
-
-	i = c - h->cmd_pool;
-	spin_lock_irqsave(&h->lock, flags);
-	clear_bit(i & (BITS_PER_LONG - 1),
-		  h->cmd_pool_bits + (i / BITS_PER_LONG));
-	spin_unlock_irqrestore(&h->lock, flags);
-}
-
-static void cmd_special_free(struct ctlr_info *h, struct CommandList *c)
-{
-	union u64bit temp64;
+	if (atomic_dec_and_test(&c->refcount)) {
+		int i;
 
-	temp64.val32.lower = c->ErrDesc.Addr.lower;
-	temp64.val32.upper = c->ErrDesc.Addr.upper;
-	pci_free_consistent(h->pdev, sizeof(*c->err_info),
-			    c->err_info, (dma_addr_t) temp64.val);
-	pci_free_consistent(h->pdev, sizeof(*c),
-			    c, (dma_addr_t) (c->busaddr & DIRECT_LOOKUP_MASK));
+		i = c - h->cmd_pool;
+		clear_bit(i & (BITS_PER_LONG - 1),
+			  h->cmd_pool_bits + (i / BITS_PER_LONG));
+	}
 }
 
 #ifdef CONFIG_COMPAT
@@ -4932,7 +6082,7 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 	IOCTL_Command_struct iocommand;
 	struct CommandList *c;
 	char *buff = NULL;
-	union u64bit temp64;
+	u64 temp64;
 	int rc = 0;
 
 	if (!argp)
@@ -4948,7 +6098,7 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 	if (iocommand.buf_size > 0) {
 		buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
 		if (buff == NULL)
-			return -EFAULT;
+			return -ENOMEM;
 		if (iocommand.Request.Type.Direction & XFER_WRITE) {
 			/* Copy the data into the buffer we created */
 			if (copy_from_user(buff, iocommand.buf,
@@ -4960,25 +6110,20 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 			memset(buff, 0, iocommand.buf_size);
 		}
 	}
-	c = cmd_special_alloc(h);
-	if (c == NULL) {
-		rc = -ENOMEM;
-		goto out_kfree;
-	}
+	c = cmd_alloc(h);
+
 	/* Fill in the command type */
 	c->cmd_type = CMD_IOCTL_PEND;
 	/* Fill in Command Header */
 	c->Header.ReplyQueue = 0; /* unused in simple mode */
 	if (iocommand.buf_size > 0) {	/* buffer to fill */
-		c->Header.SGList = 1;
-		c->Header.SGTotal = 1;
+		c->Header.SGList = (u8) 1;
+		c->Header.SGTotal = cpu_to_le16(1);
 	} else	{ /* no buffers to fill */
 		c->Header.SGList = 0;
-		c->Header.SGTotal = 0;
+		c->Header.SGTotal = cpu_to_le16(0);
 	}
 	memcpy(&c->Header.LUN, &iocommand.LUN_info, sizeof(c->Header.LUN));
-	/* use the kernel address the cmd block for tag */
-	c->Header.Tag.lower = c->busaddr;
 
 	/* Fill in Request block */
 	memcpy(&c->Request, &iocommand.Request,
@@ -4986,21 +6131,21 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 
 	/* Fill in the scatter gather information */
 	if (iocommand.buf_size > 0) {
-		temp64.val = pci_map_single(h->pdev, buff,
+		temp64 = (u64) pci_map_single(h->pdev, buff,
 			iocommand.buf_size, PCI_DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
-			c->SG[0].Addr.lower = 0;
-			c->SG[0].Addr.upper = 0;
-			c->SG[0].Len = 0;
+		if (dma_mapping_error(&h->pdev->dev, (dma_addr_t) temp64)) {
+			c->SG[0].Addr = cpu_to_le64(0);
+			c->SG[0].Len = cpu_to_le32(0);
 			rc = -ENOMEM;
 			goto out;
 		}
-		c->SG[0].Addr.lower = temp64.val32.lower;
-		c->SG[0].Addr.upper = temp64.val32.upper;
-		c->SG[0].Len = iocommand.buf_size;
-		c->SG[0].Ext = HPSA_SG_LAST; /* we are not chaining*/
+		c->SG[0].Addr = cpu_to_le64(temp64);
+		c->SG[0].Len = cpu_to_le32(iocommand.buf_size);
+		c->SG[0].Ext = cpu_to_le32(HPSA_SG_LAST); /* not chaining */
 	}
-	hpsa_scsi_do_simple_cmd_core_if_no_lockup(h, c);
+	rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT);
+	if (rc)
+		rc = -EIO;
 	if (iocommand.buf_size > 0)
 		hpsa_pci_unmap(h->pdev, c, 1, PCI_DMA_BIDIRECTIONAL);
 	check_ioctl_unit_attention(h, c);
@@ -5021,7 +6166,7 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 		}
 	}
 out:
-	cmd_special_free(h, c);
+	cmd_free(h, c);
 out_kfree:
 	kfree(buff);
 	return rc;
@@ -5033,7 +6178,7 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 	struct CommandList *c;
 	unsigned char **buff = NULL;
 	int *buff_size = NULL;
-	union u64bit temp64;
+	u64 temp64;
 	BYTE sg_used = 0;
 	int status = 0;
 	int i;
@@ -5100,38 +6245,39 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 		data_ptr += sz;
 		sg_used++;
 	}
-	c = cmd_special_alloc(h);
-	if (c == NULL) {
-		status = -ENOMEM;
-		goto cleanup1;
-	}
+	c = cmd_alloc(h);
+
 	c->cmd_type = CMD_IOCTL_PEND;
 	c->Header.ReplyQueue = 0;
-	c->Header.SGList = c->Header.SGTotal = sg_used;
+	c->Header.SGList = (u8) sg_used;
+	c->Header.SGTotal = cpu_to_le16(sg_used);
 	memcpy(&c->Header.LUN, &ioc->LUN_info, sizeof(c->Header.LUN));
-	c->Header.Tag.lower = c->busaddr;
 	memcpy(&c->Request, &ioc->Request, sizeof(c->Request));
 	if (ioc->buf_size > 0) {
 		int i;
 		for (i = 0; i < sg_used; i++) {
-			temp64.val = pci_map_single(h->pdev, buff[i],
+			temp64 = (u64) pci_map_single(h->pdev, buff[i],
 				    buff_size[i], PCI_DMA_BIDIRECTIONAL);
-			if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
-				c->SG[i].Addr.lower = 0;
-				c->SG[i].Addr.upper = 0;
+			if (dma_mapping_error(&h->pdev->dev,
+							(dma_addr_t) temp64)) {
+				c->SG[i].Addr = 0;
 				c->SG[i].Len = 0;
 				hpsa_pci_unmap(h->pdev, c, i,
 					PCI_DMA_BIDIRECTIONAL);
 				status = -ENOMEM;
 				goto cleanup0;
 			}
-			c->SG[i].Addr.lower = temp64.val32.lower;
-			c->SG[i].Addr.upper = temp64.val32.upper;
-			c->SG[i].Len = buff_size[i];
-			c->SG[i].Ext = i < sg_used - 1 ? 0 : HPSA_SG_LAST;
+			c->SG[i].Addr = cpu_to_le64(temp64);
+			c->SG[i].Len = cpu_to_le32(buff_size[i]);
+			c->SG[i].Ext =
+				cpu_to_le32((i == sg_used) * HPSA_SG_LAST);
 		}
 	}
-	hpsa_scsi_do_simple_cmd_core_if_no_lockup(h, c);
+	status = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT);
+	if (status) {
+		status = -EIO;
+		goto cleanup0;
+	}
 	if (sg_used)
 		hpsa_pci_unmap(h->pdev, c, sg_used, PCI_DMA_BIDIRECTIONAL);
 	check_ioctl_unit_attention(h, c);
@@ -5154,7 +6300,7 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp)
 	}
 	status = 0;
 cleanup0:
-	cmd_special_free(h, c);
+	cmd_free(h, c);
 cleanup1:
 	if (buff) {
 		for (i = 0; i < sg_used; i++)
@@ -5174,35 +6320,6 @@ static void check_ioctl_unit_attention(struct ctlr_info *h,
 		(void) check_for_unit_attention(h, c);
 }
 
-static int increment_passthru_count(struct ctlr_info *h)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&h->passthru_count_lock, flags);
-	if (h->passthru_count >= HPSA_MAX_CONCURRENT_PASSTHRUS) {
-		spin_unlock_irqrestore(&h->passthru_count_lock, flags);
-		return -1;
-	}
-	h->passthru_count++;
-	spin_unlock_irqrestore(&h->passthru_count_lock, flags);
-	return 0;
-}
-
-static void decrement_passthru_count(struct ctlr_info *h)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&h->passthru_count_lock, flags);
-	if (h->passthru_count <= 0) {
-		spin_unlock_irqrestore(&h->passthru_count_lock, flags);
-		/* not expecting to get here. */
-		dev_warn(&h->pdev->dev, "Bug detected, passthru_count seems to be incorrect.\n");
-		return;
-	}
-	h->passthru_count--;
-	spin_unlock_irqrestore(&h->passthru_count_lock, flags);
-}
-
 /*
  * ioctl
  */
@@ -5225,30 +6342,29 @@ static int hpsa_ioctl(struct scsi_device *dev, int cmd, void *arg)
 	case CCISS_GETDRIVVER:
 		return hpsa_getdrivver_ioctl(h, argp);
 	case CCISS_PASSTHRU:
-		if (increment_passthru_count(h))
+		if (atomic_dec_if_positive(&h->passthru_cmds_avail) < 0)
 			return -EAGAIN;
 		rc = hpsa_passthru_ioctl(h, argp);
-		decrement_passthru_count(h);
+		atomic_inc(&h->passthru_cmds_avail);
 		return rc;
 	case CCISS_BIG_PASSTHRU:
-		if (increment_passthru_count(h))
+		if (atomic_dec_if_positive(&h->passthru_cmds_avail) < 0)
 			return -EAGAIN;
 		rc = hpsa_big_passthru_ioctl(h, argp);
-		decrement_passthru_count(h);
+		atomic_inc(&h->passthru_cmds_avail);
 		return rc;
 	default:
 		return -ENOTTY;
 	}
 }
 
-static int hpsa_send_host_reset(struct ctlr_info *h, unsigned char *scsi3addr,
+static void hpsa_send_host_reset(struct ctlr_info *h, unsigned char *scsi3addr,
 				u8 reset_type)
 {
 	struct CommandList *c;
 
 	c = cmd_alloc(h);
-	if (!c)
-		return -ENOMEM;
+
 	/* fill_cmd can't fail here, no data buffer to map */
 	(void) fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0,
 		RAID_CTLR_LUNID, TYPE_MSG);
@@ -5259,7 +6375,7 @@ static int hpsa_send_host_reset(struct ctlr_info *h, unsigned char *scsi3addr,
 	 * the command either.  This is the last command we will send before
 	 * re-initializing everything, so it doesn't matter and won't leak.
 	 */
-	return 0;
+	return;
 }
 
 static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
@@ -5267,21 +6383,20 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 	int cmd_type)
 {
 	int pci_dir = XFER_NONE;
-	struct CommandList *a; /* for commands to be aborted */
+	u64 tag; /* for commands to be aborted */
+	u32 tupper, tlower;
 
 	c->cmd_type = CMD_IOCTL_PEND;
 	c->Header.ReplyQueue = 0;
 	if (buff != NULL && size > 0) {
-		c->Header.SGList = 1;
-		c->Header.SGTotal = 1;
+		c->Header.SGList = (u8) 1;
+		c->Header.SGTotal = cpu_to_le32(1);
 	} else {
 		c->Header.SGList = 0;
 		c->Header.SGTotal = 0;
 	}
-	c->Header.Tag.lower = c->busaddr;
 	memcpy(c->Header.LUN.LunAddrBytes, scsi3addr, 8);
 
-	c->Request.Type.Type = cmd_type;
 	if (cmd_type == TYPE_CMD) {
 		switch (cmd) {
 		case HPSA_INQUIRY:
@@ -5291,8 +6406,8 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 				c->Request.CDB[2] = (page_code & 0xff);
 			}
 			c->Request.CDBLen = 6;
-			c->Request.Type.Attribute = ATTR_SIMPLE;
-			c->Request.Type.Direction = XFER_READ;
+			c->Request.type_attr_dir =
+				TYPE_ATTR_DIR(cmd_type, ATTR_SIMPLE, XFER_READ);
 			c->Request.Timeout = 0;
 			c->Request.CDB[0] = HPSA_INQUIRY;
 			c->Request.CDB[4] = size & 0xFF;
@@ -5303,8 +6418,8 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 			   mode = 00 target = 0.  Nothing to write.
 			 */
 			c->Request.CDBLen = 12;
-			c->Request.Type.Attribute = ATTR_SIMPLE;
-			c->Request.Type.Direction = XFER_READ;
+			c->Request.type_attr_dir =
+				TYPE_ATTR_DIR(cmd_type, ATTR_SIMPLE, XFER_READ);
 			c->Request.Timeout = 0;
 			c->Request.CDB[0] = cmd;
 			c->Request.CDB[6] = (size >> 24) & 0xFF; /* MSB */
@@ -5314,8 +6429,9 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 			break;
 		case HPSA_CACHE_FLUSH:
 			c->Request.CDBLen = 12;
-			c->Request.Type.Attribute = ATTR_SIMPLE;
-			c->Request.Type.Direction = XFER_WRITE;
+			c->Request.type_attr_dir =
+					TYPE_ATTR_DIR(cmd_type,
+						ATTR_SIMPLE, XFER_WRITE);
 			c->Request.Timeout = 0;
 			c->Request.CDB[0] = BMIC_WRITE;
 			c->Request.CDB[6] = BMIC_CACHE_FLUSH;
@@ -5324,14 +6440,14 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 			break;
 		case TEST_UNIT_READY:
 			c->Request.CDBLen = 6;
-			c->Request.Type.Attribute = ATTR_SIMPLE;
-			c->Request.Type.Direction = XFER_NONE;
+			c->Request.type_attr_dir =
+				TYPE_ATTR_DIR(cmd_type, ATTR_SIMPLE, XFER_NONE);
 			c->Request.Timeout = 0;
 			break;
 		case HPSA_GET_RAID_MAP:
 			c->Request.CDBLen = 12;
-			c->Request.Type.Attribute = ATTR_SIMPLE;
-			c->Request.Type.Direction = XFER_READ;
+			c->Request.type_attr_dir =
+				TYPE_ATTR_DIR(cmd_type, ATTR_SIMPLE, XFER_READ);
 			c->Request.Timeout = 0;
 			c->Request.CDB[0] = HPSA_CISS_READ;
 			c->Request.CDB[1] = cmd;
@@ -5342,14 +6458,24 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 			break;
 		case BMIC_SENSE_CONTROLLER_PARAMETERS:
 			c->Request.CDBLen = 10;
-			c->Request.Type.Attribute = ATTR_SIMPLE;
-			c->Request.Type.Direction = XFER_READ;
+			c->Request.type_attr_dir =
+				TYPE_ATTR_DIR(cmd_type, ATTR_SIMPLE, XFER_READ);
 			c->Request.Timeout = 0;
 			c->Request.CDB[0] = BMIC_READ;
 			c->Request.CDB[6] = BMIC_SENSE_CONTROLLER_PARAMETERS;
 			c->Request.CDB[7] = (size >> 16) & 0xFF;
 			c->Request.CDB[8] = (size >> 8) & 0xFF;
 			break;
+		case BMIC_IDENTIFY_PHYSICAL_DEVICE:
+			c->Request.CDBLen = 10;
+			c->Request.type_attr_dir =
+				TYPE_ATTR_DIR(cmd_type, ATTR_SIMPLE, XFER_READ);
+			c->Request.Timeout = 0;
+			c->Request.CDB[0] = BMIC_READ;
+			c->Request.CDB[6] = BMIC_IDENTIFY_PHYSICAL_DEVICE;
+			c->Request.CDB[7] = (size >> 16) & 0xFF;
+			c->Request.CDB[8] = (size >> 8) & 0XFF;
+			break;
 		default:
 			dev_warn(&h->pdev->dev, "unknown command 0x%c\n", cmd);
 			BUG();
@@ -5360,9 +6486,8 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 
 		case  HPSA_DEVICE_RESET_MSG:
 			c->Request.CDBLen = 16;
-			c->Request.Type.Type =  1; /* It is a MSG not a CMD */
-			c->Request.Type.Attribute = ATTR_SIMPLE;
-			c->Request.Type.Direction = XFER_NONE;
+			c->Request.type_attr_dir =
+				TYPE_ATTR_DIR(cmd_type, ATTR_SIMPLE, XFER_NONE);
 			c->Request.Timeout = 0; /* Don't time out */
 			memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
 			c->Request.CDB[0] =  cmd;
@@ -5375,28 +6500,29 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 			c->Request.CDB[7] = 0x00;
 			break;
 		case  HPSA_ABORT_MSG:
-			a = buff;       /* point to command to be aborted */
-			dev_dbg(&h->pdev->dev, "Abort Tag:0x%08x:%08x using request Tag:0x%08x:%08x\n",
-				a->Header.Tag.upper, a->Header.Tag.lower,
-				c->Header.Tag.upper, c->Header.Tag.lower);
+			memcpy(&tag, buff, sizeof(tag));
+			dev_dbg(&h->pdev->dev, "Abort Tag:0x%016llx using request Tag:0x%016llx",
+				tag, c->Header.tag);
+			tlower = (u32) (tag >> 32);
+			tupper = (u32) (tag & 0x0ffffffffULL);
 			c->Request.CDBLen = 16;
-			c->Request.Type.Type = TYPE_MSG;
-			c->Request.Type.Attribute = ATTR_SIMPLE;
-			c->Request.Type.Direction = XFER_WRITE;
+			c->Request.type_attr_dir =
+					TYPE_ATTR_DIR(cmd_type,
+						ATTR_SIMPLE, XFER_WRITE);
 			c->Request.Timeout = 0; /* Don't time out */
 			c->Request.CDB[0] = HPSA_TASK_MANAGEMENT;
 			c->Request.CDB[1] = HPSA_TMF_ABORT_TASK;
 			c->Request.CDB[2] = 0x00; /* reserved */
 			c->Request.CDB[3] = 0x00; /* reserved */
 			/* Tag to abort goes in CDB[4]-CDB[11] */
-			c->Request.CDB[4] = a->Header.Tag.lower & 0xFF;
-			c->Request.CDB[5] = (a->Header.Tag.lower >> 8) & 0xFF;
-			c->Request.CDB[6] = (a->Header.Tag.lower >> 16) & 0xFF;
-			c->Request.CDB[7] = (a->Header.Tag.lower >> 24) & 0xFF;
-			c->Request.CDB[8] = a->Header.Tag.upper & 0xFF;
-			c->Request.CDB[9] = (a->Header.Tag.upper >> 8) & 0xFF;
-			c->Request.CDB[10] = (a->Header.Tag.upper >> 16) & 0xFF;
-			c->Request.CDB[11] = (a->Header.Tag.upper >> 24) & 0xFF;
+			c->Request.CDB[4] = tlower & 0xFF;
+			c->Request.CDB[5] = (tlower >> 8) & 0xFF;
+			c->Request.CDB[6] = (tlower >> 16) & 0xFF;
+			c->Request.CDB[7] = (tlower >> 24) & 0xFF;
+			c->Request.CDB[8] = tupper & 0xFF;
+			c->Request.CDB[9] = (tupper >> 8) & 0xFF;
+			c->Request.CDB[10] = (tupper >> 16) & 0xFF;
+			c->Request.CDB[11] = (tupper >> 24) & 0xFF;
 			c->Request.CDB[12] = 0x00; /* reserved */
 			c->Request.CDB[13] = 0x00; /* reserved */
 			c->Request.CDB[14] = 0x00; /* reserved */
@@ -5412,7 +6538,7 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
 		BUG();
 	}
 
-	switch (c->Request.Type.Direction) {
+	switch (GET_DIR(c->Request.type_attr_dir)) {
 	case XFER_READ:
 		pci_dir = PCI_DMA_FROMDEVICE;
 		break;
@@ -5443,53 +6569,6 @@ static void __iomem *remap_pci_mem(ulong base, ulong size)
 	return page_remapped ? (page_remapped + page_offs) : NULL;
 }
 
-/* Takes cmds off the submission queue and sends them to the hardware,
- * then puts them on the queue of cmds waiting for completion.
- * Assumes h->lock is held
- */
-static void start_io(struct ctlr_info *h, unsigned long *flags)
-{
-	struct CommandList *c;
-
-	while (!list_empty(&h->reqQ)) {
-		c = list_entry(h->reqQ.next, struct CommandList, list);
-		/* can't do anything if fifo is full */
-		if ((h->access.fifo_full(h))) {
-			h->fifo_recently_full = 1;
-			dev_warn(&h->pdev->dev, "fifo full\n");
-			break;
-		}
-		h->fifo_recently_full = 0;
-
-		/* Get the first entry from the Request Q */
-		removeQ(c);
-		h->Qdepth--;
-
-		/* Put job onto the completed Q */
-		addQ(&h->cmpQ, c);
-
-		/* Must increment commands_outstanding before unlocking
-		 * and submitting to avoid race checking for fifo full
-		 * condition.
-		 */
-		h->commands_outstanding++;
-
-		/* Tell the controller execute command */
-		spin_unlock_irqrestore(&h->lock, *flags);
-		h->access.submit_command(h, c);
-		spin_lock_irqsave(&h->lock, *flags);
-	}
-}
-
-static void lock_and_start_io(struct ctlr_info *h)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&h->lock, flags);
-	start_io(h, &flags);
-	spin_unlock_irqrestore(&h->lock, flags);
-}
-
 static inline unsigned long get_next_completion(struct ctlr_info *h, u8 q)
 {
 	return h->access.command_completed(h, q);
@@ -5518,53 +6597,12 @@ static inline int bad_tag(struct ctlr_info *h, u32 tag_index,
 
 static inline void finish_cmd(struct CommandList *c)
 {
-	unsigned long flags;
-	int io_may_be_stalled = 0;
-	struct ctlr_info *h = c->h;
-
-	spin_lock_irqsave(&h->lock, flags);
-	removeQ(c);
-
-	/*
-	 * Check for possibly stalled i/o.
-	 *
-	 * If a fifo_full condition is encountered, requests will back up
-	 * in h->reqQ.  This queue is only emptied out by start_io which is
-	 * only called when a new i/o request comes in.  If no i/o's are
-	 * forthcoming, the i/o's in h->reqQ can get stuck.  So we call
-	 * start_io from here if we detect such a danger.
-	 *
-	 * Normally, we shouldn't hit this case, but pounding on the
-	 * CCISS_PASSTHRU ioctl can provoke it.  Only call start_io if
-	 * commands_outstanding is low.  We want to avoid calling
-	 * start_io from in here as much as possible, and esp. don't
-	 * want to get in a cycle where we call start_io every time
-	 * through here.
-	 */
-	if (unlikely(h->fifo_recently_full) &&
-		h->commands_outstanding < 5)
-		io_may_be_stalled = 1;
-
-	spin_unlock_irqrestore(&h->lock, flags);
-
 	dial_up_lockup_detection_on_fw_flash_complete(c->h, c);
 	if (likely(c->cmd_type == CMD_IOACCEL1 || c->cmd_type == CMD_SCSI
 			|| c->cmd_type == CMD_IOACCEL2))
 		complete_scsi_command(c);
-	else if (c->cmd_type == CMD_IOCTL_PEND)
+	else if (c->cmd_type == CMD_IOCTL_PEND || c->cmd_type == IOACCEL2_TMF)
 		complete(c->waiting);
-	if (unlikely(io_may_be_stalled))
-		lock_and_start_io(h);
-}
-
-static inline u32 hpsa_tag_contains_index(u32 tag)
-{
-	return tag & DIRECT_LOOKUP_BIT;
-}
-
-static inline u32 hpsa_tag_to_index(u32 tag)
-{
-	return tag >> DIRECT_LOOKUP_SHIFT;
 }
 
 
@@ -5584,34 +6622,13 @@ static inline void process_indexed_cmd(struct ctlr_info *h,
 	u32 tag_index;
 	struct CommandList *c;
 
-	tag_index = hpsa_tag_to_index(raw_tag);
+	tag_index = raw_tag >> DIRECT_LOOKUP_SHIFT;
 	if (!bad_tag(h, tag_index, raw_tag)) {
 		c = h->cmd_pool + tag_index;
 		finish_cmd(c);
 	}
 }
 
-/* process completion of a non-indexed command */
-static inline void process_nonindexed_cmd(struct ctlr_info *h,
-	u32 raw_tag)
-{
-	u32 tag;
-	struct CommandList *c = NULL;
-	unsigned long flags;
-
-	tag = hpsa_tag_discard_error_bits(h, raw_tag);
-	spin_lock_irqsave(&h->lock, flags);
-	list_for_each_entry(c, &h->cmpQ, list) {
-		if ((c->busaddr & 0xFFFFFFE0) == (tag & 0xFFFFFFE0)) {
-			spin_unlock_irqrestore(&h->lock, flags);
-			finish_cmd(c);
-			return;
-		}
-	}
-	spin_unlock_irqrestore(&h->lock, flags);
-	bad_tag(h, h->nr_cmds + 1, raw_tag);
-}
-
 /* Some controllers, like p400, will give us one interrupt
  * after a soft reset, even if we turned interrupts off.
  * Only need to check for this in the hpsa_xxx_discard_completions
@@ -5689,10 +6706,7 @@ static irqreturn_t do_hpsa_intr_intx(int irq, void *queue)
 	while (interrupt_pending(h)) {
 		raw_tag = get_next_completion(h, q);
 		while (raw_tag != FIFO_EMPTY) {
-			if (likely(hpsa_tag_contains_index(raw_tag)))
-				process_indexed_cmd(h, raw_tag);
-			else
-				process_nonindexed_cmd(h, raw_tag);
+			process_indexed_cmd(h, raw_tag);
 			raw_tag = next_command(h, q);
 		}
 	}
@@ -5708,10 +6722,7 @@ static irqreturn_t do_hpsa_intr_msi(int irq, void *queue)
 	h->last_intr_timestamp = get_jiffies_64();
 	raw_tag = get_next_completion(h, q);
 	while (raw_tag != FIFO_EMPTY) {
-		if (likely(hpsa_tag_contains_index(raw_tag)))
-			process_indexed_cmd(h, raw_tag);
-		else
-			process_nonindexed_cmd(h, raw_tag);
+		process_indexed_cmd(h, raw_tag);
 		raw_tag = next_command(h, q);
 	}
 	return IRQ_HANDLED;
@@ -5748,7 +6759,7 @@ static int hpsa_message(struct pci_dev *pdev, unsigned char opcode,
 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 	if (err) {
 		iounmap(vaddr);
-		return -ENOMEM;
+		return err;
 	}
 
 	cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
@@ -5766,21 +6777,19 @@ static int hpsa_message(struct pci_dev *pdev, unsigned char opcode,
 	cmd->CommandHeader.ReplyQueue = 0;
 	cmd->CommandHeader.SGList = 0;
 	cmd->CommandHeader.SGTotal = 0;
-	cmd->CommandHeader.Tag.lower = paddr32;
-	cmd->CommandHeader.Tag.upper = 0;
+	cmd->CommandHeader.tag = (u64) paddr32;
 	memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
 
 	cmd->Request.CDBLen = 16;
-	cmd->Request.Type.Type = TYPE_MSG;
-	cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
-	cmd->Request.Type.Direction = XFER_NONE;
+	cmd->Request.type_attr_dir =
+			TYPE_ATTR_DIR(TYPE_MSG, ATTR_HEADOFQUEUE, XFER_NONE);
 	cmd->Request.Timeout = 0; /* Don't time out */
 	cmd->Request.CDB[0] = opcode;
 	cmd->Request.CDB[1] = type;
 	memset(&cmd->Request.CDB[2], 0, 14); /* rest of the CDB is reserved */
-	cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(*cmd);
-	cmd->ErrorDescriptor.Addr.upper = 0;
-	cmd->ErrorDescriptor.Len = sizeof(struct ErrorInfo);
+	cmd->ErrorDescriptor.Addr =
+			cpu_to_le64((u64) (paddr32 + sizeof(*cmd)));
+	cmd->ErrorDescriptor.Len = cpu_to_le32(sizeof(struct ErrorInfo));
 
 	writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
 
@@ -5960,8 +6969,12 @@ static int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev)
 	 */
 
 	rc = hpsa_lookup_board_id(pdev, &board_id);
-	if (rc < 0 || !ctlr_is_resettable(board_id)) {
-		dev_warn(&pdev->dev, "Not resetting device.\n");
+	if (rc < 0) {
+		dev_warn(&pdev->dev, "Board ID not found\n");
+		return rc;
+	}
+	if (!ctlr_is_resettable(board_id)) {
+		dev_warn(&pdev->dev, "Controller not resettable\n");
 		return -ENODEV;
 	}
 
@@ -6027,7 +7040,7 @@ static int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev)
 	rc = hpsa_wait_for_board_state(pdev, vaddr, BOARD_READY);
 	if (rc) {
 		dev_warn(&pdev->dev,
-			"failed waiting for board to become ready "
+			"Failed waiting for board to become ready "
 			"after hard reset\n");
 		goto unmap_cfgtable;
 	}
@@ -6058,7 +7071,6 @@ static int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev)
  */
 static void print_cfg_table(struct device *dev, struct CfgTable *tb)
 {
-#ifdef HPSA_DEBUG
 	int i;
 	char temp_name[17];
 
@@ -6079,7 +7091,7 @@ static void print_cfg_table(struct device *dev, struct CfgTable *tb)
 	       readl(&(tb->HostWrite.CoalIntDelay)));
 	dev_info(dev, "   Coalesce Interrupt Count = 0x%x\n",
 	       readl(&(tb->HostWrite.CoalIntCount)));
-	dev_info(dev, "   Max outstanding commands = 0x%d\n",
+	dev_info(dev, "   Max outstanding commands = %d\n",
 	       readl(&(tb->CmdsOutMax)));
 	dev_info(dev, "   Bus Types = 0x%x\n", readl(&(tb->BusTypes)));
 	for (i = 0; i < 16; i++)
@@ -6088,7 +7100,6 @@ static void print_cfg_table(struct device *dev, struct CfgTable *tb)
 	dev_info(dev, "   Server Name = %s\n", temp_name);
 	dev_info(dev, "   Heartbeat Counter = 0x%x\n\n\n",
 		readl(&(tb->HeartBeat)));
-#endif				/* HPSA_DEBUG */
 }
 
 static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
@@ -6126,10 +7137,24 @@ static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
 	return -1;
 }
 
+static void hpsa_disable_interrupt_mode(struct ctlr_info *h)
+{
+#ifdef CONFIG_PCI_MSI
+	if (h->msix_vector) {
+		if (h->pdev->msix_enabled)
+			pci_disable_msix(h->pdev);
+		h->msix_vector = 0;
+	} else if (h->msi_vector) {
+		if (h->pdev->msi_enabled)
+			pci_disable_msi(h->pdev);
+		h->msi_vector = 0;
+	}
+#endif /* CONFIG_PCI_MSI */
+}
+
 /* If MSI/MSI-X is supported by the kernel we will try to enable it on
- * controllers that are capable. If not, we use IO-APIC mode.
+ * controllers that are capable. If not, we use legacy INTx mode.
  */
-
 static void hpsa_interrupt_mode(struct ctlr_info *h)
 {
 #ifdef CONFIG_PCI_MSI
@@ -6146,7 +7171,7 @@ static void hpsa_interrupt_mode(struct ctlr_info *h)
 	    (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11))
 		goto default_int_mode;
 	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
-		dev_info(&h->pdev->dev, "MSIX\n");
+		dev_info(&h->pdev->dev, "MSI-X capable controller\n");
 		h->msix_vector = MAX_REPLY_QUEUES;
 		if (h->msix_vector > num_online_cpus())
 			h->msix_vector = num_online_cpus();
@@ -6167,7 +7192,7 @@ static void hpsa_interrupt_mode(struct ctlr_info *h)
 	}
 single_msi_mode:
 	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) {
-		dev_info(&h->pdev->dev, "MSI\n");
+		dev_info(&h->pdev->dev, "MSI capable controller\n");
 		if (!pci_enable_msi(h->pdev))
 			h->msi_vector = 1;
 		else
@@ -6260,6 +7285,21 @@ static int hpsa_find_cfg_addrs(struct pci_dev *pdev, void __iomem *vaddr,
 	return 0;
 }
 
+static void hpsa_free_cfgtables(struct ctlr_info *h)
+{
+	if (h->transtable) {
+		iounmap(h->transtable);
+		h->transtable = NULL;
+	}
+	if (h->cfgtable) {
+		iounmap(h->cfgtable);
+		h->cfgtable = NULL;
+	}
+}
+
+/* Find and map CISS config table and transfer table
++ * several items must be unmapped (freed) later
++ * */
 static int hpsa_find_cfgtables(struct ctlr_info *h)
 {
 	u64 cfg_offset;
@@ -6274,8 +7314,10 @@ static int hpsa_find_cfgtables(struct ctlr_info *h)
 		return rc;
 	h->cfgtable = remap_pci_mem(pci_resource_start(h->pdev,
 		       cfg_base_addr_index) + cfg_offset, sizeof(*h->cfgtable));
-	if (!h->cfgtable)
+	if (!h->cfgtable) {
+		dev_err(&h->pdev->dev, "Failed mapping cfgtable\n");
 		return -ENOMEM;
+	}
 	rc = write_driver_ver_to_cfgtable(h->cfgtable);
 	if (rc)
 		return rc;
@@ -6284,28 +7326,43 @@ static int hpsa_find_cfgtables(struct ctlr_info *h)
 	h->transtable = remap_pci_mem(pci_resource_start(h->pdev,
 				cfg_base_addr_index)+cfg_offset+trans_offset,
 				sizeof(*h->transtable));
-	if (!h->transtable)
+	if (!h->transtable) {
+		dev_err(&h->pdev->dev, "Failed mapping transfer table\n");
+		hpsa_free_cfgtables(h);
 		return -ENOMEM;
+	}
 	return 0;
 }
 
 static void hpsa_get_max_perf_mode_cmds(struct ctlr_info *h)
 {
+#define MIN_MAX_COMMANDS 16
+	BUILD_BUG_ON(MIN_MAX_COMMANDS <= HPSA_NRESERVED_CMDS);
+
 	h->max_commands = readl(&(h->cfgtable->MaxPerformantModeCommands));
 
 	/* Limit commands in memory limited kdump scenario. */
 	if (reset_devices && h->max_commands > 32)
 		h->max_commands = 32;
 
-	if (h->max_commands < 16) {
+	if (h->max_commands < MIN_MAX_COMMANDS) {
 		dev_warn(&h->pdev->dev, "Controller reports "
 			"max supported commands of %d, an obvious lie. "
 			"Using 16.  Ensure that firmware is up to date.\n",
 			h->max_commands);
-		h->max_commands = 16;
+		h->max_commands = MIN_MAX_COMMANDS;
 	}
 }
 
+/* If the controller reports that the total max sg entries is greater than 512,
+ * then we know that chained SG blocks work.  (Original smart arrays did not
+ * support chained SG blocks and would return zero for max sg entries.)
+ */
+static int hpsa_supports_chained_sg_blocks(struct ctlr_info *h)
+{
+	return h->maxsgentries > 512;
+}
+
 /* Interrogate the hardware for some limits:
  * max commands, max SG elements without chaining, and with chaining,
  * SG chain block size, etc.
@@ -6313,19 +7370,21 @@ static void hpsa_get_max_perf_mode_cmds(struct ctlr_info *h)
 static void hpsa_find_board_params(struct ctlr_info *h)
 {
 	hpsa_get_max_perf_mode_cmds(h);
-	h->nr_cmds = h->max_commands - 4; /* Allow room for some ioctls */
+	h->nr_cmds = h->max_commands;
 	h->maxsgentries = readl(&(h->cfgtable->MaxScatterGatherElements));
 	h->fw_support = readl(&(h->cfgtable->misc_fw_support));
-	/*
-	 * Limit in-command s/g elements to 32 save dma'able memory.
-	 * Howvever spec says if 0, use 31
-	 */
-	h->max_cmd_sg_entries = 31;
-	if (h->maxsgentries > 512) {
+	if (hpsa_supports_chained_sg_blocks(h)) {
+		/* Limit in-command s/g elements to 32 save dma'able memory. */
 		h->max_cmd_sg_entries = 32;
-		h->chainsize = h->maxsgentries - h->max_cmd_sg_entries + 1;
+		h->chainsize = h->maxsgentries - h->max_cmd_sg_entries;
 		h->maxsgentries--; /* save one for chain pointer */
 	} else {
+		/* Original smart arrays supported at most 31 scatter gather
+		 * entries embedded inline in the command (trying to use more
+		 * would lock up the controller, see
+		 * https://lkml.org/lkml/2001/12/4/139 ).
+		 */
+		h->max_cmd_sg_entries = 31;
 		h->maxsgentries = 31; /* default to traditional values */
 		h->chainsize = 0;
 	}
@@ -6336,12 +7395,14 @@ static void hpsa_find_board_params(struct ctlr_info *h)
 		dev_warn(&h->pdev->dev, "Physical aborts not supported\n");
 	if (!(HPSATMF_LOG_TASK_ABORT & h->TMFSupportFlags))
 		dev_warn(&h->pdev->dev, "Logical aborts not supported\n");
+	if (!(HPSATMF_IOACCEL_ENABLED & h->TMFSupportFlags))
+		dev_warn(&h->pdev->dev, "HP SSD Smart Path aborts supported\n");
 }
 
 static inline bool hpsa_CISS_signature_present(struct ctlr_info *h)
 {
 	if (!check_signature(h->cfgtable->Signature, "CISS", 4)) {
-		dev_warn(&h->pdev->dev, "not a valid CISS config table\n");
+		dev_err(&h->pdev->dev, "not a valid CISS config table\n");
 		return false;
 	}
 	return true;
@@ -6374,24 +7435,27 @@ static inline void hpsa_p600_dma_prefetch_quirk(struct ctlr_info *h)
 	writel(dma_prefetch, h->vaddr + I2O_DMA1_CFG);
 }
 
-static void hpsa_wait_for_clear_event_notify_ack(struct ctlr_info *h)
+static int hpsa_wait_for_clear_event_notify_ack(struct ctlr_info *h)
 {
 	int i;
 	u32 doorbell_value;
 	unsigned long flags;
 	/* wait until the clear_event_notify bit 6 is cleared by controller. */
-	for (i = 0; i < MAX_CONFIG_WAIT; i++) {
+	for (i = 0; i < MAX_CLEAR_EVENT_WAIT; i++) {
 		spin_lock_irqsave(&h->lock, flags);
 		doorbell_value = readl(h->vaddr + SA5_DOORBELL);
 		spin_unlock_irqrestore(&h->lock, flags);
 		if (!(doorbell_value & DOORBELL_CLEAR_EVENTS))
-			break;
+			goto done;
 		/* delay and try again */
-		msleep(20);
+		msleep(CLEAR_EVENT_WAIT_INTERVAL);
 	}
+	return -ENODEV;
+done:
+	return 0;
 }
 
-static void hpsa_wait_for_mode_change_ack(struct ctlr_info *h)
+static int hpsa_wait_for_mode_change_ack(struct ctlr_info *h)
 {
 	int i;
 	u32 doorbell_value;
@@ -6401,17 +7465,21 @@ static void hpsa_wait_for_mode_change_ack(struct ctlr_info *h)
 	 * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
 	 * as we enter this code.)
 	 */
-	for (i = 0; i < MAX_CONFIG_WAIT; i++) {
+	for (i = 0; i < MAX_MODE_CHANGE_WAIT; i++) {
 		spin_lock_irqsave(&h->lock, flags);
 		doorbell_value = readl(h->vaddr + SA5_DOORBELL);
 		spin_unlock_irqrestore(&h->lock, flags);
 		if (!(doorbell_value & CFGTBL_ChangeReq))
-			break;
+			goto done;
 		/* delay and try again */
-		usleep_range(10000, 20000);
+		msleep(MODE_CHANGE_WAIT_INTERVAL);
 	}
+	return -ENODEV;
+done:
+	return 0;
 }
 
+/* return -ENODEV or other reason on error, 0 on success */
 static int hpsa_enter_simple_mode(struct ctlr_info *h)
 {
 	u32 trans_support;
@@ -6426,33 +7494,54 @@ static int hpsa_enter_simple_mode(struct ctlr_info *h)
 	writel(CFGTBL_Trans_Simple, &(h->cfgtable->HostWrite.TransportRequest));
 	writel(0, &h->cfgtable->HostWrite.command_pool_addr_hi);
 	writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
-	hpsa_wait_for_mode_change_ack(h);
+	if (hpsa_wait_for_mode_change_ack(h))
+		goto error;
 	print_cfg_table(&h->pdev->dev, h->cfgtable);
 	if (!(readl(&(h->cfgtable->TransportActive)) & CFGTBL_Trans_Simple))
 		goto error;
 	h->transMethod = CFGTBL_Trans_Simple;
 	return 0;
 error:
-	dev_warn(&h->pdev->dev, "unable to get board into simple mode\n");
+	dev_err(&h->pdev->dev, "failed to enter simple mode\n");
 	return -ENODEV;
 }
 
+/* free items allocated or mapped by hpsa_pci_init */
+static void hpsa_free_pci_init(struct ctlr_info *h)
+{
+	hpsa_free_cfgtables(h);			/* pci_init 4 */
+	iounmap(h->vaddr);			/* pci_init 3 */
+	h->vaddr = NULL;
+	hpsa_disable_interrupt_mode(h);		/* pci_init 2 */
+	/*
+	 * call pci_disable_device before pci_release_regions per
+	 * Documentation/PCI/pci.txt
+	 */
+	pci_disable_device(h->pdev);		/* pci_init 1 */
+	pci_release_regions(h->pdev);		/* pci_init 2 */
+}
+
+/* several items must be freed later */
 static int hpsa_pci_init(struct ctlr_info *h)
 {
 	int prod_index, err;
 
 	prod_index = hpsa_lookup_board_id(h->pdev, &h->board_id);
 	if (prod_index < 0)
-		return -ENODEV;
+		return prod_index;
 	h->product_name = products[prod_index].product_name;
 	h->access = *(products[prod_index].access);
 
+	h->needs_abort_tags_swizzled =
+		ctlr_needs_abort_tags_swizzled(h->board_id);
+
 	pci_disable_link_state(h->pdev, PCIE_LINK_STATE_L0S |
 			       PCIE_LINK_STATE_L1 | PCIE_LINK_STATE_CLKPM);
 
 	err = pci_enable_device(h->pdev);
 	if (err) {
-		dev_warn(&h->pdev->dev, "unable to enable PCI device\n");
+		dev_err(&h->pdev->dev, "failed to enable PCI device\n");
+		pci_disable_device(h->pdev);
 		return err;
 	}
 
@@ -6462,44 +7551,50 @@ static int hpsa_pci_init(struct ctlr_info *h)
 	err = pci_request_regions(h->pdev, HPSA);
 	if (err) {
 		dev_err(&h->pdev->dev,
-			"cannot obtain PCI resources, aborting\n");
+			"failed to obtain PCI resources\n");
+		pci_disable_device(h->pdev);
 		return err;
 	}
 	hpsa_interrupt_mode(h);
 	err = hpsa_pci_find_memory_BAR(h->pdev, &h->paddr);
 	if (err)
-		goto err_out_free_res;
+		goto clean2;	/* intmode+region, pci */
 	h->vaddr = remap_pci_mem(h->paddr, 0x250);
 	if (!h->vaddr) {
+		dev_err(&h->pdev->dev, "failed to remap PCI mem\n");
 		err = -ENOMEM;
-		goto err_out_free_res;
+		goto clean2;	/* intmode+region, pci */
 	}
 	err = hpsa_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY);
 	if (err)
-		goto err_out_free_res;
+		goto clean3;	/* vaddr, intmode+region, pci */
 	err = hpsa_find_cfgtables(h);
 	if (err)
-		goto err_out_free_res;
+		goto clean3;	/* vaddr, intmode+region, pci */
 	hpsa_find_board_params(h);
 
 	if (!hpsa_CISS_signature_present(h)) {
 		err = -ENODEV;
-		goto err_out_free_res;
+		goto clean4;	/* cfgtables, vaddr, intmode+region, pci */
 	}
 	hpsa_set_driver_support_bits(h);
 	hpsa_p600_dma_prefetch_quirk(h);
 	err = hpsa_enter_simple_mode(h);
 	if (err)
-		goto err_out_free_res;
+		goto clean4;	/* cfgtables, vaddr, intmode+region, pci */
 	return 0;
 
-err_out_free_res:
-	if (h->transtable)
-		iounmap(h->transtable);
-	if (h->cfgtable)
-		iounmap(h->cfgtable);
-	if (h->vaddr)
-		iounmap(h->vaddr);
+clean4:	/* cfgtables, vaddr, intmode+region, pci */
+	hpsa_free_cfgtables(h);
+clean3:	/* vaddr, intmode+region, pci */
+	iounmap(h->vaddr);
+	h->vaddr = NULL;
+clean2:	/* intmode+region, pci */
+	hpsa_disable_interrupt_mode(h);
+	/*
+	 * call pci_disable_device before pci_release_regions per
+	 * Documentation/PCI/pci.txt
+	 */
 	pci_disable_device(h->pdev);
 	pci_release_regions(h->pdev);
 	return err;
@@ -6553,14 +7648,11 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev)
 	 * "performant mode".  Or, it might be 640x, which can't reset
 	 * due to concerns about shared bbwc between 6402/6404 pair.
 	 */
-	if (rc) {
-		if (rc != -ENOTSUPP) /* just try to do the kdump anyhow. */
-			rc = -ENODEV;
+	if (rc)
 		goto out_disable;
-	}
 
 	/* Now try to get the controller to respond to a no-op */
-	dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n");
+	dev_info(&pdev->dev, "Waiting for controller to respond to no-op\n");
 	for (i = 0; i < HPSA_POST_RESET_NOOP_RETRIES; i++) {
 		if (hpsa_noop(pdev) == 0)
 			break;
@@ -6575,7 +7667,28 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev)
 	return rc;
 }
 
-static int hpsa_allocate_cmd_pool(struct ctlr_info *h)
+static void hpsa_free_cmd_pool(struct ctlr_info *h)
+{
+	kfree(h->cmd_pool_bits);
+	h->cmd_pool_bits = NULL;
+	if (h->cmd_pool) {
+		pci_free_consistent(h->pdev,
+			    h->nr_cmds * sizeof(struct CommandList),
+			    h->cmd_pool, h->cmd_pool_dhandle);
+		h->cmd_pool = NULL;
+		h->cmd_pool_dhandle = 0;
+	}
+	if (h->errinfo_pool) {
+		pci_free_consistent(h->pdev,
+			    h->nr_cmds * sizeof(struct ErrorInfo),
+			    h->errinfo_pool,
+			    h->errinfo_pool_dhandle);
+		h->errinfo_pool = NULL;
+		h->errinfo_pool_dhandle = 0;
+	}
+}
+
+static int hpsa_alloc_cmd_pool(struct ctlr_info *h)
 {
 	h->cmd_pool_bits = kzalloc(
 		DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) *
@@ -6590,45 +7703,51 @@ static int hpsa_allocate_cmd_pool(struct ctlr_info *h)
 	    || (h->cmd_pool == NULL)
 	    || (h->errinfo_pool == NULL)) {
 		dev_err(&h->pdev->dev, "out of memory in %s", __func__);
-		return -ENOMEM;
+		goto clean_up;
 	}
+	hpsa_preinitialize_commands(h);
 	return 0;
+clean_up:
+	hpsa_free_cmd_pool(h);
+	return -ENOMEM;
 }
 
-static void hpsa_free_cmd_pool(struct ctlr_info *h)
+static void hpsa_irq_affinity_hints(struct ctlr_info *h)
 {
-	kfree(h->cmd_pool_bits);
-	if (h->cmd_pool)
-		pci_free_consistent(h->pdev,
-			    h->nr_cmds * sizeof(struct CommandList),
-			    h->cmd_pool, h->cmd_pool_dhandle);
-	if (h->ioaccel2_cmd_pool)
-		pci_free_consistent(h->pdev,
-			h->nr_cmds * sizeof(*h->ioaccel2_cmd_pool),
-			h->ioaccel2_cmd_pool, h->ioaccel2_cmd_pool_dhandle);
-	if (h->errinfo_pool)
-		pci_free_consistent(h->pdev,
-			    h->nr_cmds * sizeof(struct ErrorInfo),
-			    h->errinfo_pool,
-			    h->errinfo_pool_dhandle);
-	if (h->ioaccel_cmd_pool)
-		pci_free_consistent(h->pdev,
-			h->nr_cmds * sizeof(struct io_accel1_cmd),
-			h->ioaccel_cmd_pool, h->ioaccel_cmd_pool_dhandle);
+	int i, cpu, rc;
+
+	cpu = cpumask_first(cpu_online_mask);
+	for (i = 0; i < h->msix_vector; i++) {
+		rc = irq_set_affinity_hint(h->intr[i], get_cpu_mask(cpu));
+		cpu = cpumask_next(cpu, cpu_online_mask);
+	}
 }
 
-static void hpsa_irq_affinity_hints(struct ctlr_info *h)
+/* clear affinity hints and free MSI-X, MSI, or legacy INTx vectors */
+static void hpsa_free_irqs(struct ctlr_info *h)
 {
-	int i, cpu, rc;
+	int i;
+
+	if (!h->msix_vector || h->intr_mode != PERF_MODE_INT) {
+		/* Single reply queue, only one irq to free */
+		i = h->intr_mode;
+		irq_set_affinity_hint(h->intr[i], NULL);
+		free_irq(h->intr[i], &h->q[i]);
+		h->q[i] = 0;
+		return;
+	}
 
-	cpu = cpumask_first(cpu_online_mask);
 	for (i = 0; i < h->msix_vector; i++) {
-		rc = irq_set_affinity_hint(h->intr[i], get_cpu_mask(cpu));
-		cpu = cpumask_next(cpu, cpu_online_mask);
+		irq_set_affinity_hint(h->intr[i], NULL);
+		free_irq(h->intr[i], &h->q[i]);
+		h->q[i] = 0;
 	}
+	for (; i < MAX_REPLY_QUEUES; i++)
+		h->q[i] = 0;
 }
 
-static int hpsa_request_irq(struct ctlr_info *h,
+/* returns 0 on success; cleans up and returns -Enn on error */
+static int hpsa_request_irqs(struct ctlr_info *h,
 	irqreturn_t (*msixhandler)(int, void *),
 	irqreturn_t (*intxhandler)(int, void *))
 {
@@ -6643,26 +7762,54 @@ static int hpsa_request_irq(struct ctlr_info *h,
 
 	if (h->intr_mode == PERF_MODE_INT && h->msix_vector > 0) {
 		/* If performant mode and MSI-X, use multiple reply queues */
-		for (i = 0; i < h->msix_vector; i++)
+		for (i = 0; i < h->msix_vector; i++) {
+			sprintf(h->intrname[i], "%s-msix%d", h->devname, i);
 			rc = request_irq(h->intr[i], msixhandler,
-					0, h->devname,
+					0, h->intrname[i],
 					&h->q[i]);
+			if (rc) {
+				int j;
+
+				dev_err(&h->pdev->dev,
+					"failed to get irq %d for %s\n",
+				       h->intr[i], h->devname);
+				for (j = 0; j < i; j++) {
+					free_irq(h->intr[j], &h->q[j]);
+					h->q[j] = 0;
+				}
+				for (; j < MAX_REPLY_QUEUES; j++)
+					h->q[j] = 0;
+				return rc;
+			}
+		}
 		hpsa_irq_affinity_hints(h);
 	} else {
 		/* Use single reply pool */
 		if (h->msix_vector > 0 || h->msi_vector) {
+			if (h->msix_vector)
+				sprintf(h->intrname[h->intr_mode],
+					"%s-msix", h->devname);
+			else
+				sprintf(h->intrname[h->intr_mode],
+					"%s-msi", h->devname);
 			rc = request_irq(h->intr[h->intr_mode],
-				msixhandler, 0, h->devname,
+				msixhandler, 0,
+				h->intrname[h->intr_mode],
 				&h->q[h->intr_mode]);
 		} else {
+			sprintf(h->intrname[h->intr_mode],
+				"%s-intx", h->devname);
 			rc = request_irq(h->intr[h->intr_mode],
-				intxhandler, IRQF_SHARED, h->devname,
+				intxhandler, IRQF_SHARED,
+				h->intrname[h->intr_mode],
 				&h->q[h->intr_mode]);
 		}
+		irq_set_affinity_hint(h->intr[h->intr_mode], NULL);
 	}
 	if (rc) {
-		dev_err(&h->pdev->dev, "unable to get irq %d for %s\n",
+		dev_err(&h->pdev->dev, "failed to get irq %d for %s\n",
 		       h->intr[h->intr_mode], h->devname);
+		hpsa_free_irqs(h);
 		return -ENODEV;
 	}
 	return 0;
@@ -6670,60 +7817,27 @@ static int hpsa_request_irq(struct ctlr_info *h,
 
 static int hpsa_kdump_soft_reset(struct ctlr_info *h)
 {
-	if (hpsa_send_host_reset(h, RAID_CTLR_LUNID,
-		HPSA_RESET_TYPE_CONTROLLER)) {
-		dev_warn(&h->pdev->dev, "Resetting array controller failed.\n");
-		return -EIO;
-	}
+	int rc;
+	hpsa_send_host_reset(h, RAID_CTLR_LUNID, HPSA_RESET_TYPE_CONTROLLER);
 
 	dev_info(&h->pdev->dev, "Waiting for board to soft reset.\n");
-	if (hpsa_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY)) {
+	rc = hpsa_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY);
+	if (rc) {
 		dev_warn(&h->pdev->dev, "Soft reset had no effect.\n");
-		return -1;
+		return rc;
 	}
 
 	dev_info(&h->pdev->dev, "Board reset, awaiting READY status.\n");
-	if (hpsa_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY)) {
+	rc = hpsa_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY);
+	if (rc) {
 		dev_warn(&h->pdev->dev, "Board failed to become ready "
 			"after soft reset.\n");
-		return -1;
+		return rc;
 	}
 
 	return 0;
 }
 
-static void free_irqs(struct ctlr_info *h)
-{
-	int i;
-
-	if (!h->msix_vector || h->intr_mode != PERF_MODE_INT) {
-		/* Single reply queue, only one irq to free */
-		i = h->intr_mode;
-		irq_set_affinity_hint(h->intr[i], NULL);
-		free_irq(h->intr[i], &h->q[i]);
-		return;
-	}
-
-	for (i = 0; i < h->msix_vector; i++) {
-		irq_set_affinity_hint(h->intr[i], NULL);
-		free_irq(h->intr[i], &h->q[i]);
-	}
-}
-
-static void hpsa_free_irqs_and_disable_msix(struct ctlr_info *h)
-{
-	free_irqs(h);
-#ifdef CONFIG_PCI_MSI
-	if (h->msix_vector) {
-		if (h->pdev->msix_enabled)
-			pci_disable_msix(h->pdev);
-	} else if (h->msi_vector) {
-		if (h->pdev->msi_enabled)
-			pci_disable_msi(h->pdev);
-	}
-#endif /* CONFIG_PCI_MSI */
-}
-
 static void hpsa_free_reply_queues(struct ctlr_info *h)
 {
 	int i;
@@ -6735,40 +7849,50 @@ static void hpsa_free_reply_queues(struct ctlr_info *h)
 			h->reply_queue[i].head, h->reply_queue[i].busaddr);
 		h->reply_queue[i].head = NULL;
 		h->reply_queue[i].busaddr = 0;
+		h->reply_queue_size = 0;
 	}
 }
 
 static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h)
 {
-	hpsa_free_irqs_and_disable_msix(h);
-	hpsa_free_sg_chain_blocks(h);
-	hpsa_free_cmd_pool(h);
-	kfree(h->ioaccel1_blockFetchTable);
-	kfree(h->blockFetchTable);
-	hpsa_free_reply_queues(h);
-	if (h->vaddr)
-		iounmap(h->vaddr);
-	if (h->transtable)
-		iounmap(h->transtable);
-	if (h->cfgtable)
-		iounmap(h->cfgtable);
-	pci_disable_device(h->pdev);
-	pci_release_regions(h->pdev);
-	kfree(h);
+	hpsa_free_performant_mode(h);		/* init_one 7 */
+	hpsa_free_sg_chain_blocks(h);		/* init_one 6 */
+	hpsa_free_cmd_pool(h);			/* init_one 5 */
+	hpsa_free_irqs(h);			/* init_one 4 */
+	scsi_host_put(h->scsi_host);		/* init_one 3 */
+	h->scsi_host = NULL;			/* init_one 3 */
+	hpsa_free_pci_init(h);			/* init_one 2_5 */
+	free_percpu(h->lockup_detected);	/* init_one 2 */
+	h->lockup_detected = NULL;		/* init_one 2 */
+	if (h->resubmit_wq)
+		destroy_workqueue(h->resubmit_wq);	/* init_one 1 */
+	kfree(h);				/* init_one 1 */
 }
 
 /* Called when controller lockup detected. */
-static void fail_all_cmds_on_list(struct ctlr_info *h, struct list_head *list)
+static void fail_all_outstanding_cmds(struct ctlr_info *h)
 {
-	struct CommandList *c = NULL;
+	int i, refcount;
+	struct CommandList *c;
+	int failcount = 0;
 
-	assert_spin_locked(&h->lock);
-	/* Mark all outstanding commands as failed and complete them. */
-	while (!list_empty(list)) {
-		c = list_entry(list->next, struct CommandList, list);
-		c->err_info->CommandStatus = CMD_HARDWARE_ERR;
-		finish_cmd(c);
+	flush_workqueue(h->resubmit_wq); /* ensure all cmds are fully built */
+	for (i = 0; i < h->nr_cmds; i++) {
+		c = h->cmd_pool + i;
+		refcount = atomic_inc_return(&c->refcount);
+		if (refcount > 1) {
+			c->err_info->CommandStatus = CMD_CTLR_LOCKUP;
+			/* CMD_CTLR_LOCKUP gets finish_cmd to return
+			 * DID_NO_CONNECT which doesn't get retried
+			 */
+			finish_cmd(c);
+			atomic_dec(&h->commands_outstanding);
+			failcount++;
+		}
+		cmd_free(h, c);
 	}
+	dev_warn(&h->pdev->dev,
+		"failed %d commands in fail_all\n", failcount);
 }
 
 static void set_lockup_detected_for_all_cpus(struct ctlr_info *h, u32 value)
@@ -6796,18 +7920,16 @@ static void controller_lockup_detected(struct ctlr_info *h)
 	if (!lockup_detected) {
 		/* no heartbeat, but controller gave us a zero. */
 		dev_warn(&h->pdev->dev,
-			"lockup detected but scratchpad register is zero\n");
+			"lockup detected after %d but scratchpad register is zero\n",
+			h->heartbeat_sample_interval / HZ);
 		lockup_detected = 0xffffffff;
 	}
 	set_lockup_detected_for_all_cpus(h, lockup_detected);
 	spin_unlock_irqrestore(&h->lock, flags);
-	dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x\n",
-			lockup_detected);
+	dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x after %d\n",
+			lockup_detected, h->heartbeat_sample_interval / HZ);
 	pci_disable_device(h->pdev);
-	spin_lock_irqsave(&h->lock, flags);
-	fail_all_cmds_on_list(h, &h->cmpQ);
-	fail_all_cmds_on_list(h, &h->reqQ);
-	spin_unlock_irqrestore(&h->lock, flags);
+	fail_all_outstanding_cmds(h);
 }
 
 static void detect_controller_lockup(struct ctlr_info *h)
@@ -6816,6 +7938,9 @@ static void detect_controller_lockup(struct ctlr_info *h)
 	u32 heartbeat;
 	unsigned long flags;
 
+	if (!h->lockup_detector_enabled)
+		return;
+
 	now = get_jiffies_64();
 	/* If we've received an interrupt recently, we're ok. */
 	if (time_after64(h->last_intr_timestamp +
@@ -6850,8 +7975,8 @@ static void hpsa_ack_ctlr_events(struct ctlr_info *h)
 	int i;
 	char *event_type;
 
-	/* Clear the driver-requested rescan flag */
-	h->drv_req_rescan = 0;
+	if (!(h->fw_support & MISC_FW_EVENT_NOTIFY))
+		return;
 
 	/* Ask the controller to clear the events we're handling. */
 	if ((h->transMethod & (CFGTBL_Trans_io_accel1
@@ -6898,9 +8023,6 @@ static void hpsa_ack_ctlr_events(struct ctlr_info *h)
  */
 static int hpsa_ctlr_needs_rescan(struct ctlr_info *h)
 {
-	if (h->drv_req_rescan)
-		return 1;
-
 	if (!(h->fw_support & MISC_FW_EVENT_NOTIFY))
 		return 0;
 
@@ -6934,32 +8056,117 @@ static int hpsa_offline_devices_ready(struct ctlr_info *h)
 	return 0;
 }
 
-
-static void hpsa_monitor_ctlr_worker(struct work_struct *work)
+static void hpsa_requeue_worker(struct ctlr_info *h, struct delayed_work *wi)
 {
 	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+	if (!h->remove_in_progress)
+		schedule_delayed_work(wi, h->heartbeat_sample_interval);
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+static void hpsa_rescan_ctlr_worker(struct work_struct *work)
+{
 	struct ctlr_info *h = container_of(to_delayed_work(work),
-					struct ctlr_info, monitor_ctlr_work);
-	detect_controller_lockup(h);
-	if (lockup_detected(h))
-		return;
+					struct ctlr_info, rescan_ctlr_work);
 
 	if (hpsa_ctlr_needs_rescan(h) || hpsa_offline_devices_ready(h)) {
 		scsi_host_get(h->scsi_host);
-		h->drv_req_rescan = 0;
 		hpsa_ack_ctlr_events(h);
 		hpsa_scan_start(h->scsi_host);
 		scsi_host_put(h->scsi_host);
 	}
 
-	spin_lock_irqsave(&h->lock, flags);
-	if (h->remove_in_progress) {
-		spin_unlock_irqrestore(&h->lock, flags);
+	hpsa_requeue_worker(h, &h->rescan_ctlr_work);
+}
+
+static void hpsa_monitor_ctlr_worker(struct work_struct *work)
+{
+	struct ctlr_info *h = container_of(to_delayed_work(work),
+					struct ctlr_info, monitor_ctlr_work);
+	detect_controller_lockup(h);
+	if (lockup_detected(h))
 		return;
+
+	hpsa_requeue_worker(h, &h->monitor_ctlr_work);
+}
+
+static void check_board_id_tables(void)
+{
+	int i, j, found;
+	uint32_t board_id;
+	int count;
+
+	for (i = 0; i < ARRAY_SIZE(products); i++) {
+		board_id = products[i].board_id;
+		if (board_id == 0xFFFF103C) /* sentinel entry */
+			continue;
+		found = 0;
+		count = 0;
+		for (j = 0; j < ARRAY_SIZE(hpsa_pci_device_id); j++) {
+			uint32_t bid;
+
+			bid = (hpsa_pci_device_id[j].subdevice << 16) |
+				hpsa_pci_device_id[j].subvendor;
+			if (bid == board_id) {
+				found = 1;
+				if (i != j) {
+					printk(KERN_WARNING HPSA
+						": products[%d] (%s) found at pci table entry %d\n",
+						i, products[i].product_name, j);
+				}
+				count++;
+			}
+		}
+		if (!found)
+			printk(KERN_WARNING HPSA
+				": products[%d] (%s) not found in pci table\n",
+				i, products[i].product_name);
+		if (count > 1)
+			printk(KERN_WARNING HPSA
+				": products[%d] (%s) has duplicate entries in pci table\n",
+				i, products[i].product_name);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(hpsa_pci_device_id); i++) {
+		board_id = (hpsa_pci_device_id[i].subdevice << 16) |
+			hpsa_pci_device_id[i].subvendor;
+		if (hpsa_pci_device_id[i].vendor == 0) /* sentinel */
+			continue;
+		if (hpsa_pci_device_id[i].subvendor == PCI_ANY_ID &&
+			hpsa_pci_device_id[i].subdevice == PCI_ANY_ID)
+			continue;
+		count = 0;
+		found = 0;
+		for (j = 0; j < ARRAY_SIZE(products); j++) {
+			if (board_id == products[j].board_id) {
+				if (j != i) {
+					printk(KERN_WARNING HPSA
+						": pci table entry %d found at product entry %d (%s)\n",
+						i, j, products[j].product_name);
+				}
+				found = 1;
+				count++;
+			}
+		}
+		if (!found)
+			printk(KERN_WARNING HPSA
+				": pci table entry %d (%04x:%04x) not found in product table\n",
+				i, hpsa_pci_device_id[i].subvendor,
+				hpsa_pci_device_id[i].subdevice);
+		if (count > 1)
+			printk(KERN_WARNING HPSA
+				": pci table entry %d (%04x:%04x) has duplicate product[] entries\n",
+				i, hpsa_pci_device_id[i].subvendor,
+				hpsa_pci_device_id[i].subdevice);
+	}
+	if (ARRAY_SIZE(products) != ARRAY_SIZE(hpsa_pci_device_id) - 1) {
+		printk(KERN_WARNING HPSA
+			": suspicious relative cardinality of products vs hpsa_pci_device_id (%lu/%lu)\n",
+			(unsigned long) ARRAY_SIZE(products),
+			(unsigned long) ARRAY_SIZE(hpsa_pci_device_id));
 	}
-	schedule_delayed_work(&h->monitor_ctlr_work,
-				h->heartbeat_sample_interval);
-	spin_unlock_irqrestore(&h->lock, flags);
 }
 
 static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
@@ -6969,6 +8176,7 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	int try_soft_reset = 0;
 	unsigned long flags;
 
+	check_board_id_tables();
 	if (number_of_controllers == 0)
 		printk(KERN_INFO DRIVER_NAME "\n");
 
@@ -6993,33 +8201,42 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 */
 	BUILD_BUG_ON(sizeof(struct CommandList) % COMMANDLIST_ALIGNMENT);
 	h = kzalloc(sizeof(*h), GFP_KERNEL);
-	if (!h)
+	if (!h) {
+		dev_err(&pdev->dev, "Failed to allocate controller head\n");
 		return -ENOMEM;
+	}
 
 	h->pdev = pdev;
+
 	h->intr_mode = hpsa_simple_mode ? SIMPLE_MODE_INT : PERF_MODE_INT;
-	INIT_LIST_HEAD(&h->cmpQ);
-	INIT_LIST_HEAD(&h->reqQ);
 	INIT_LIST_HEAD(&h->offline_device_list);
 	spin_lock_init(&h->lock);
 	spin_lock_init(&h->offline_device_lock);
 	spin_lock_init(&h->scan_lock);
-	spin_lock_init(&h->passthru_count_lock);
+	atomic_set(&h->passthru_cmds_avail, HPSA_MAX_CONCURRENT_PASSTHRUS);
+	atomic_set(&h->abort_cmds_available, HPSA_CMDS_RESERVED_FOR_ABORTS);
+	atomic_set(&h->cmds_sent, 0);
 
 	/* Allocate and clear per-cpu variable lockup_detected */
 	h->lockup_detected = alloc_percpu(u32);
 	if (!h->lockup_detected) {
+		dev_err(&h->pdev->dev, "Failed to allocate lockup detector\n");
 		rc = -ENOMEM;
-		goto clean1;
+		goto clean1;	/* aer/h */
 	}
 	set_lockup_detected_for_all_cpus(h, 0);
 
 	rc = hpsa_pci_init(h);
-	if (rc != 0)
-		goto clean1;
+	if (rc)
+		goto clean2;	/* lu, aer/h */
+
+	/* relies on h-> settings made by hpsa_pci_init, including
+	 * interrupt_mode h->intr */
+	rc = hpsa_scsi_host_alloc(h);
+	if (rc)
+		goto clean2_5;	/* pci, lu, aer/h */
 
-	sprintf(h->devname, HPSA "%d", number_of_controllers);
-	h->ctlr = number_of_controllers;
+	sprintf(h->devname, HPSA "%d", h->scsi_host->host_no);
 	number_of_controllers++;
 
 	/* configure PCI DMA stuff */
@@ -7032,31 +8249,48 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			dac = 0;
 		} else {
 			dev_err(&pdev->dev, "no suitable DMA available\n");
-			goto clean1;
+			goto clean3;	/* shost, pci, lu, aer/h */
 		}
 	}
 
 	/* make sure the board interrupts are off */
 	h->access.set_intr_mask(h, HPSA_INTR_OFF);
 
-	if (hpsa_request_irq(h, do_hpsa_intr_msi, do_hpsa_intr_intx))
-		goto clean2;
-	dev_info(&pdev->dev, "%s: <0x%x> at IRQ %d%s using DAC\n",
-	       h->devname, pdev->device,
-	       h->intr[h->intr_mode], dac ? "" : " not");
-	if (hpsa_allocate_cmd_pool(h))
-		goto clean4;
-	if (hpsa_allocate_sg_chain_blocks(h))
-		goto clean4;
+	rc = hpsa_request_irqs(h, do_hpsa_intr_msi, do_hpsa_intr_intx);
+	if (rc)
+		goto clean3;	/* shost, pci, lu, aer/h */
+	rc = hpsa_alloc_cmd_pool(h);
+	if (rc)
+		goto clean4;	/* irq, shost, pci, lu, aer/h */
+	rc = hpsa_alloc_sg_chain_blocks(h);
+	if (rc)
+		goto clean5;	/* cmd, irq, shost, pci, lu, aer/h */
 	init_waitqueue_head(&h->scan_wait_queue);
+	init_waitqueue_head(&h->abort_cmd_wait_queue);
+	init_waitqueue_head(&h->abort_sync_wait_queue);
 	h->scan_finished = 1; /* no scan currently in progress */
 
 	pci_set_drvdata(pdev, h);
 	h->ndevices = 0;
 	h->hba_mode_enabled = 0;
-	h->scsi_host = NULL;
+
 	spin_lock_init(&h->devlock);
-	hpsa_put_ctlr_into_performant_mode(h);
+	rc = hpsa_put_ctlr_into_performant_mode(h);
+	if (rc)
+		goto clean6; /* sg, cmd, irq, shost, pci, lu, aer/h */
+
+	/* hook into SCSI subsystem */
+	rc = hpsa_scsi_add_host(h);
+	if (rc)
+		goto clean7; /* perf, sg, cmd, irq, shost, pci, lu, aer/h */
+
+	/* create the resubmit workqueue */
+	h->resubmit_wq = alloc_workqueue("hpsa", WQ_MEM_RECLAIM, 0);
+	if (!h->resubmit_wq) {
+		dev_err(&h->pdev->dev, "Failed to allocate work queue\n");
+		rc = -ENOMEM;
+		goto clean8; /* sh, perf, sg, cmd, irq, shost, pci, lu, aer/h */
+	}
 
 	/* At this point, the controller is ready to take commands.
 	 * Now, if reset_devices and the hard reset didn't work, try
@@ -7074,19 +8308,28 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		spin_lock_irqsave(&h->lock, flags);
 		h->access.set_intr_mask(h, HPSA_INTR_OFF);
 		spin_unlock_irqrestore(&h->lock, flags);
-		free_irqs(h);
-		rc = hpsa_request_irq(h, hpsa_msix_discard_completions,
+		hpsa_free_irqs(h);
+		rc = hpsa_request_irqs(h, hpsa_msix_discard_completions,
 					hpsa_intx_discard_completions);
 		if (rc) {
-			dev_warn(&h->pdev->dev, "Failed to request_irq after "
-				"soft reset.\n");
-			goto clean4;
+			dev_warn(&h->pdev->dev,
+				"Failed to request_irq after soft reset.\n");
+			/* cannot goto clean9 or free_irqs will be called
+			 * again. Instead, do its work */
+			if (h->resubmit_wq)
+				destroy_workqueue(h->resubmit_wq); /* clean9 */
+			hpsa_free_performant_mode(h);	/* clean7 */
+			hpsa_free_sg_chain_blocks(h);	/* clean6 */
+			hpsa_free_cmd_pool(h);		/* clean5 */
+			/* skip hpsa_free_irqs(h) clean4 since that
+			 * was just called before request_irqs failed */
+			goto clean3;
 		}
 
 		rc = hpsa_kdump_soft_reset(h);
 		if (rc)
 			/* Neither hard nor soft reset worked, we're hosed. */
-			goto clean4;
+			goto clean9;
 
 		dev_info(&h->pdev->dev, "Board READY.\n");
 		dev_info(&h->pdev->dev,
@@ -7107,38 +8350,58 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		hpsa_undo_allocations_after_kdump_soft_reset(h);
 		try_soft_reset = 0;
 		if (rc)
-			/* don't go to clean4, we already unallocated */
+			/* don't goto clean, we already unallocated */
 			return -ENODEV;
 
 		goto reinit_after_soft_reset;
 	}
 
-		/* Enable Accelerated IO path at driver layer */
-		h->acciopath_status = 1;
+	/* Enable Accelerated IO path at driver layer */
+	h->acciopath_status = 1;
 
-	h->drv_req_rescan = 0;
+	h->lockup_detector_enabled = 1;
 
 	/* Turn the interrupts on so we can service requests */
 	h->access.set_intr_mask(h, HPSA_INTR_ON);
 
 	hpsa_hba_inquiry(h);
-	hpsa_register_scsi(h);	/* hook ourselves into SCSI subsystem */
 
 	/* Monitor the controller for firmware lockups */
 	h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL;
 	INIT_DELAYED_WORK(&h->monitor_ctlr_work, hpsa_monitor_ctlr_worker);
 	schedule_delayed_work(&h->monitor_ctlr_work,
 				h->heartbeat_sample_interval);
+	INIT_DELAYED_WORK(&h->rescan_ctlr_work, hpsa_rescan_ctlr_worker);
+	schedule_delayed_work(&h->rescan_ctlr_work,
+				h->heartbeat_sample_interval);
 	return 0;
 
-clean4:
+clean9: /* wq, sh, perf, sg, cmd, irq, shost, pci, lu, aer/h */
+	if (h->resubmit_wq)
+		destroy_workqueue(h->resubmit_wq);
+clean8: /* sh, perf, sg, cmd, irq, shost, pci, lu, aer/h */
+	/* nothing to clean from scsi_add_host...
+	 * scsi_host_put will do so below */
+clean7: /* perf, sg, cmd, irq, shost, pci, lu, aer/h */
+	hpsa_free_performant_mode(h);
+clean6: /* sg, cmd, irq, shost, pci, lu, aer/h */
 	hpsa_free_sg_chain_blocks(h);
+clean5: /* cmd, irq, shost, pci, lu, aer/h */
 	hpsa_free_cmd_pool(h);
-	free_irqs(h);
-clean2:
-clean1:
-	if (h->lockup_detected)
+clean4: /* irq, shost, pci, lu, aer/h */
+	hpsa_free_irqs(h);
+clean3: /* shost, pci, lu, aer/h */
+	scsi_host_put(h->scsi_host);
+	h->scsi_host = NULL;
+clean2_5: /* pci, lu, aer/h */
+	hpsa_free_pci_init(h);
+clean2: /* lu, aer/h */
+	if (h->lockup_detected) {
 		free_percpu(h->lockup_detected);
+		h->lockup_detected = NULL;
+	}
+clean1:	/* aer/h */
+	/* (void) pci_disable_pcie_error_reporting(pdev); */
 	kfree(h);
 	return rc;
 }
@@ -7147,30 +8410,31 @@ static void hpsa_flush_cache(struct ctlr_info *h)
 {
 	char *flush_buf;
 	struct CommandList *c;
+	int rc;
 
 	/* Don't bother trying to flush the cache if locked up */
+	/* FIXME not necessary if do_simple_cmd does the check */
 	if (unlikely(lockup_detected(h)))
 		return;
 	flush_buf = kzalloc(4, GFP_KERNEL);
 	if (!flush_buf)
 		return;
 
-	c = cmd_special_alloc(h);
-	if (!c) {
-		dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n");
-		goto out_of_memory;
-	}
+	c = cmd_alloc(h);
+
 	if (fill_cmd(c, HPSA_CACHE_FLUSH, h, flush_buf, 4, 0,
 		RAID_CTLR_LUNID, TYPE_CMD)) {
 		goto out;
 	}
-	hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_TODEVICE);
+	rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+					PCI_DMA_TODEVICE, NO_TIMEOUT);
+	if (rc)
+		goto out;
 	if (c->err_info->CommandStatus != 0)
 out:
 		dev_warn(&h->pdev->dev,
 			"error flushing cache on controller\n");
-	cmd_special_free(h, c);
-out_of_memory:
+	cmd_free(h, c);
 	kfree(flush_buf);
 }
 
@@ -7185,15 +8449,18 @@ static void hpsa_shutdown(struct pci_dev *pdev)
 	 */
 	hpsa_flush_cache(h);
 	h->access.set_intr_mask(h, HPSA_INTR_OFF);
-	hpsa_free_irqs_and_disable_msix(h);
+	hpsa_free_irqs(h);			/* init_one 4 */
+	hpsa_disable_interrupt_mode(h);		/* pci_init 2 */
 }
 
 static void hpsa_free_device_info(struct ctlr_info *h)
 {
 	int i;
 
-	for (i = 0; i < h->ndevices; i++)
+	for (i = 0; i < h->ndevices; i++) {
 		kfree(h->dev[i]);
+		h->dev[i] = NULL;
+	}
 }
 
 static void hpsa_remove_one(struct pci_dev *pdev)
@@ -7210,32 +8477,38 @@ static void hpsa_remove_one(struct pci_dev *pdev)
 	/* Get rid of any controller monitoring work items */
 	spin_lock_irqsave(&h->lock, flags);
 	h->remove_in_progress = 1;
-	cancel_delayed_work(&h->monitor_ctlr_work);
+	cancel_delayed_work_sync(&h->monitor_ctlr_work);
+	cancel_delayed_work_sync(&h->rescan_ctlr_work);
 	spin_unlock_irqrestore(&h->lock, flags);
 
-	hpsa_unregister_scsi(h);	/* unhook from SCSI subsystem */
+	/* includes hpsa_free_irqs - init_one 4 */
+	/* includes hpsa_disable_interrupt_mode - pci_init 2 */
 	hpsa_shutdown(pdev);
-	iounmap(h->vaddr);
-	iounmap(h->transtable);
-	iounmap(h->cfgtable);
-	hpsa_free_device_info(h);
-	hpsa_free_sg_chain_blocks(h);
-	pci_free_consistent(h->pdev,
-		h->nr_cmds * sizeof(struct CommandList),
-		h->cmd_pool, h->cmd_pool_dhandle);
-	pci_free_consistent(h->pdev,
-		h->nr_cmds * sizeof(struct ErrorInfo),
-		h->errinfo_pool, h->errinfo_pool_dhandle);
-	hpsa_free_reply_queues(h);
-	kfree(h->cmd_pool_bits);
-	kfree(h->blockFetchTable);
-	kfree(h->ioaccel1_blockFetchTable);
-	kfree(h->ioaccel2_blockFetchTable);
-	kfree(h->hba_inquiry_data);
-	pci_disable_device(pdev);
-	pci_release_regions(pdev);
-	free_percpu(h->lockup_detected);
-	kfree(h);
+
+	hpsa_free_device_info(h);		/* scan */
+
+	kfree(h->hba_inquiry_data);			/* init_one 10 */
+	h->hba_inquiry_data = NULL;			/* init_one 10 */
+	if (h->resubmit_wq)
+		destroy_workqueue(h->resubmit_wq);	/* init_one 9 */
+	scsi_remove_host(h->scsi_host);			/* init_one 8 */
+	hpsa_free_ioaccel2_sg_chain_blocks(h);
+	hpsa_free_performant_mode(h);			/* init_one 7 */
+	hpsa_free_sg_chain_blocks(h);			/* init_one 6 */
+	hpsa_free_cmd_pool(h);				/* init_one 5 */
+
+	/* hpsa_free_irqs already called via hpsa_shutdown init_one 4 */
+
+	scsi_host_put(h->scsi_host);			/* init_one 3 */
+	h->scsi_host = NULL;				/* init_one 3 */
+
+	/* includes hpsa_disable_interrupt_mode - pci_init 2 */
+	hpsa_free_pci_init(h);				/* init_one 2.5 */
+
+	free_percpu(h->lockup_detected);		/* init_one 2 */
+	h->lockup_detected = NULL;			/* init_one 2 */
+	/* (void) pci_disable_pcie_error_reporting(pdev); */	/* init_one 1 */
+	kfree(h);					/* init_one 1 */
 }
 
 static int hpsa_suspend(__attribute__((unused)) struct pci_dev *pdev,
@@ -7293,7 +8566,8 @@ static void  calc_bucket_map(int bucket[], int num_buckets,
 	}
 }
 
-static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
+/* return -ENODEV or other reason on error, 0 on success */
+static int hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
 {
 	int i;
 	unsigned long register_value;
@@ -7385,12 +8659,17 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
 		}
 	}
 	writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
-	hpsa_wait_for_mode_change_ack(h);
+	if (hpsa_wait_for_mode_change_ack(h)) {
+		dev_err(&h->pdev->dev,
+			"performant mode problem - doorbell timeout\n");
+		return -ENODEV;
+	}
+	print_cfg_table(&h->pdev->dev, h->cfgtable);
 	register_value = readl(&(h->cfgtable->TransportActive));
 	if (!(register_value & CFGTBL_Trans_Performant)) {
-		dev_warn(&h->pdev->dev, "unable to get board into"
-					" performant mode\n");
-		return;
+		dev_err(&h->pdev->dev,
+			"performant mode problem - transport not active\n");
+		return -ENODEV;
 	}
 	/* Change the access methods to the performant access methods */
 	h->access = access;
@@ -7398,7 +8677,7 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
 
 	if (!((trans_support & CFGTBL_Trans_io_accel1) ||
 		(trans_support & CFGTBL_Trans_io_accel2)))
-		return;
+		return 0;
 
 	if (trans_support & CFGTBL_Trans_io_accel1) {
 		/* Set up I/O accelerator mode */
@@ -7431,13 +8710,10 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
 			cp->host_context_flags = IOACCEL1_HCFLAGS_CISS_FORMAT;
 			cp->timeout_sec = 0;
 			cp->ReplyQueue = 0;
-			cp->Tag.lower = (i << DIRECT_LOOKUP_SHIFT) |
-						DIRECT_LOOKUP_BIT;
-			cp->Tag.upper = 0;
-			cp->host_addr.lower =
-				(u32) (h->ioaccel_cmd_pool_dhandle +
-					(i * sizeof(struct io_accel1_cmd)));
-			cp->host_addr.upper = 0;
+			cp->tag = cpu_to_le64((u64) i << DIRECT_LOOKUP_SHIFT);
+			cp->host_addr =
+			    cpu_to_le64((u64) (h->ioaccel_cmd_pool_dhandle +
+					(i * sizeof(struct io_accel1_cmd))));
 		}
 	} else if (trans_support & CFGTBL_Trans_io_accel2) {
 		u64 cfg_offset, cfg_base_addr_index;
@@ -7463,10 +8739,31 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support)
 			writel(bft2[i], &h->ioaccel2_bft2_regs[i]);
 	}
 	writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
-	hpsa_wait_for_mode_change_ack(h);
+	if (hpsa_wait_for_mode_change_ack(h)) {
+		dev_err(&h->pdev->dev,
+			"performant mode problem - enabling ioaccel mode\n");
+		return -ENODEV;
+	}
+	print_cfg_table(&h->pdev->dev, h->cfgtable);
+	return 0;
+}
+
+/* Free ioaccel1 mode command blocks and block fetch table */
+static void hpsa_free_ioaccel1_cmd_and_bft(struct ctlr_info *h)
+{
+	if (h->ioaccel_cmd_pool) {
+		pci_free_consistent(h->pdev,
+			h->nr_cmds * sizeof(*h->ioaccel_cmd_pool),
+			h->ioaccel_cmd_pool, h->ioaccel_cmd_pool_dhandle);
+		h->ioaccel_cmd_pool = NULL;
+		h->ioaccel_cmd_pool_dhandle = 0;
+	}
+	kfree(h->ioaccel1_blockFetchTable);
+	h->ioaccel1_blockFetchTable = NULL;
 }
 
-static int hpsa_alloc_ioaccel_cmd_and_bft(struct ctlr_info *h)
+/* Allocate ioaccel1 mode command blocks and block fetch table */
+static int hpsa_alloc_ioaccel1_cmd_and_bft(struct ctlr_info *h)
 {
 	h->ioaccel_maxsg =
 		readl(&(h->cfgtable->io_accel_max_embedded_sg_count));
@@ -7497,16 +8794,31 @@ static int hpsa_alloc_ioaccel_cmd_and_bft(struct ctlr_info *h)
 	return 0;
 
 clean_up:
-	if (h->ioaccel_cmd_pool)
+	hpsa_free_ioaccel1_cmd_and_bft(h);
+	return -ENOMEM;
+}
+
+/* Free ioaccel2 mode command blocks and block fetch table */
+static void hpsa_free_ioaccel2_cmd_and_bft(struct ctlr_info *h)
+{
+	hpsa_free_ioaccel2_sg_chain_blocks(h);
+
+	if (h->ioaccel2_cmd_pool) {
 		pci_free_consistent(h->pdev,
-			h->nr_cmds * sizeof(*h->ioaccel_cmd_pool),
-			h->ioaccel_cmd_pool, h->ioaccel_cmd_pool_dhandle);
-	kfree(h->ioaccel1_blockFetchTable);
-	return 1;
+			h->nr_cmds * sizeof(*h->ioaccel2_cmd_pool),
+			h->ioaccel2_cmd_pool, h->ioaccel2_cmd_pool_dhandle);
+		h->ioaccel2_cmd_pool = NULL;
+		h->ioaccel2_cmd_pool_dhandle = 0;
+	}
+	kfree(h->ioaccel2_blockFetchTable);
+	h->ioaccel2_blockFetchTable = NULL;
 }
 
-static int ioaccel2_alloc_cmds_and_bft(struct ctlr_info *h)
+/* Allocate ioaccel2 mode command blocks and block fetch table */
+static int hpsa_alloc_ioaccel2_cmd_and_bft(struct ctlr_info *h)
 {
+	int rc;
+
 	/* Allocate ioaccel2 mode command blocks and block fetch table */
 
 	h->ioaccel_maxsg =
@@ -7526,7 +8838,13 @@ static int ioaccel2_alloc_cmds_and_bft(struct ctlr_info *h)
 				sizeof(u32)), GFP_KERNEL);
 
 	if ((h->ioaccel2_cmd_pool == NULL) ||
-		(h->ioaccel2_blockFetchTable == NULL))
+		(h->ioaccel2_blockFetchTable == NULL)) {
+		rc = -ENOMEM;
+		goto clean_up;
+	}
+
+	rc = hpsa_allocate_ioaccel2_sg_chain_blocks(h);
+	if (rc)
 		goto clean_up;
 
 	memset(h->ioaccel2_cmd_pool, 0,
@@ -7534,41 +8852,50 @@ static int ioaccel2_alloc_cmds_and_bft(struct ctlr_info *h)
 	return 0;
 
 clean_up:
-	if (h->ioaccel2_cmd_pool)
-		pci_free_consistent(h->pdev,
-			h->nr_cmds * sizeof(*h->ioaccel2_cmd_pool),
-			h->ioaccel2_cmd_pool, h->ioaccel2_cmd_pool_dhandle);
-	kfree(h->ioaccel2_blockFetchTable);
-	return 1;
+	hpsa_free_ioaccel2_cmd_and_bft(h);
+	return rc;
+}
+
+/* Free items allocated by hpsa_put_ctlr_into_performant_mode */
+static void hpsa_free_performant_mode(struct ctlr_info *h)
+{
+	kfree(h->blockFetchTable);
+	h->blockFetchTable = NULL;
+	hpsa_free_reply_queues(h);
+	hpsa_free_ioaccel1_cmd_and_bft(h);
+	hpsa_free_ioaccel2_cmd_and_bft(h);
 }
 
-static void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h)
+/* return -ENODEV on error, 0 on success (or no action)
+ * allocates numerous items that must be freed later
+ */
+static int hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h)
 {
 	u32 trans_support;
 	unsigned long transMethod = CFGTBL_Trans_Performant |
 					CFGTBL_Trans_use_short_tags;
-	int i;
+	int i, rc;
 
 	if (hpsa_simple_mode)
-		return;
+		return 0;
 
 	trans_support = readl(&(h->cfgtable->TransportSupport));
 	if (!(trans_support & PERFORMANT_MODE))
-		return;
+		return 0;
 
 	/* Check for I/O accelerator mode support */
 	if (trans_support & CFGTBL_Trans_io_accel1) {
 		transMethod |= CFGTBL_Trans_io_accel1 |
 				CFGTBL_Trans_enable_directed_msix;
-		if (hpsa_alloc_ioaccel_cmd_and_bft(h))
-			goto clean_up;
-	} else {
-		if (trans_support & CFGTBL_Trans_io_accel2) {
-				transMethod |= CFGTBL_Trans_io_accel2 |
+		rc = hpsa_alloc_ioaccel1_cmd_and_bft(h);
+		if (rc)
+			return rc;
+	} else if (trans_support & CFGTBL_Trans_io_accel2) {
+		transMethod |= CFGTBL_Trans_io_accel2 |
 				CFGTBL_Trans_enable_directed_msix;
-		if (ioaccel2_alloc_cmds_and_bft(h))
-			goto clean_up;
-		}
+		rc = hpsa_alloc_ioaccel2_cmd_and_bft(h);
+		if (rc)
+			return rc;
 	}
 
 	h->nreply_queues = h->msix_vector > 0 ? h->msix_vector : 1;
@@ -7580,8 +8907,10 @@ static void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h)
 		h->reply_queue[i].head = pci_alloc_consistent(h->pdev,
 						h->reply_queue_size,
 						&(h->reply_queue[i].busaddr));
-		if (!h->reply_queue[i].head)
-			goto clean_up;
+		if (!h->reply_queue[i].head) {
+			rc = -ENOMEM;
+			goto clean1;	/* rq, ioaccel */
+		}
 		h->reply_queue[i].size = h->max_commands;
 		h->reply_queue[i].wraparound = 1;  /* spec: init to 1 */
 		h->reply_queue[i].current_entry = 0;
@@ -7590,15 +8919,24 @@ static void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h)
 	/* Need a block fetch table for performant mode */
 	h->blockFetchTable = kmalloc(((SG_ENTRIES_IN_CMD + 1) *
 				sizeof(u32)), GFP_KERNEL);
-	if (!h->blockFetchTable)
-		goto clean_up;
+	if (!h->blockFetchTable) {
+		rc = -ENOMEM;
+		goto clean1;	/* rq, ioaccel */
+	}
 
-	hpsa_enter_performant_mode(h, trans_support);
-	return;
+	rc = hpsa_enter_performant_mode(h, trans_support);
+	if (rc)
+		goto clean2;	/* bft, rq, ioaccel */
+	return 0;
 
-clean_up:
-	hpsa_free_reply_queues(h);
+clean2:	/* bft, rq, ioaccel */
 	kfree(h->blockFetchTable);
+	h->blockFetchTable = NULL;
+clean1:	/* rq, ioaccel */
+	hpsa_free_reply_queues(h);
+	hpsa_free_ioaccel1_cmd_and_bft(h);
+	hpsa_free_ioaccel2_cmd_and_bft(h);
+	return rc;
 }
 
 static int is_accelerated_cmd(struct CommandList *c)
@@ -7609,17 +8947,18 @@ static int is_accelerated_cmd(struct CommandList *c)
 static void hpsa_drain_accel_commands(struct ctlr_info *h)
 {
 	struct CommandList *c = NULL;
-	unsigned long flags;
-	int accel_cmds_out;
+	int i, accel_cmds_out;
+	int refcount;
 
-	do { /* wait for all outstanding commands to drain out */
+	do { /* wait for all outstanding ioaccel commands to drain out */
 		accel_cmds_out = 0;
-		spin_lock_irqsave(&h->lock, flags);
-		list_for_each_entry(c, &h->cmpQ, list)
-			accel_cmds_out += is_accelerated_cmd(c);
-		list_for_each_entry(c, &h->reqQ, list)
-			accel_cmds_out += is_accelerated_cmd(c);
-		spin_unlock_irqrestore(&h->lock, flags);
+		for (i = 0; i < h->nr_cmds; i++) {
+			c = h->cmd_pool + i;
+			refcount = atomic_inc_return(&c->refcount);
+			if (refcount > 1) /* Command is allocated */
+				accel_cmds_out += is_accelerated_cmd(c);
+			cmd_free(h, c);
+		}
 		if (accel_cmds_out <= 0)
 			break;
 		msleep(100);
@@ -7711,7 +9050,7 @@ static void __attribute__((unused)) verify_offsets(void)
 	VERIFY_OFFSET(timeout_sec, 0x62);
 	VERIFY_OFFSET(ReplyQueue, 0x64);
 	VERIFY_OFFSET(reserved9, 0x65);
-	VERIFY_OFFSET(Tag, 0x68);
+	VERIFY_OFFSET(tag, 0x68);
 	VERIFY_OFFSET(host_addr, 0x70);
 	VERIFY_OFFSET(CISS_LUN, 0x78);
 	VERIFY_OFFSET(SG, 0x78 + 8);
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index 24472cec7de34d..5643d55ac5a805 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -32,7 +32,6 @@ struct access_method {
 	void (*submit_command)(struct ctlr_info *h,
 		struct CommandList *c);
 	void (*set_intr_mask)(struct ctlr_info *h, unsigned long val);
-	unsigned long (*fifo_full)(struct ctlr_info *h);
 	bool (*intr_pending)(struct ctlr_info *h);
 	unsigned long (*command_completed)(struct ctlr_info *h, u8 q);
 };
@@ -47,14 +46,36 @@ struct hpsa_scsi_dev_t {
 	unsigned char model[16];        /* bytes 16-31 of inquiry data */
 	unsigned char raid_level;	/* from inquiry page 0xC1 */
 	unsigned char volume_offline;	/* discovered via TUR or VPD */
+	u16 queue_depth;		/* max queue_depth for this device */
+	atomic_t ioaccel_cmds_out;	/* Only used for physical devices
+					 * counts commands sent to physical
+					 * device via "ioaccel" path.
+					 */
 	u32 ioaccel_handle;
 	int offload_config;		/* I/O accel RAID offload configured */
 	int offload_enabled;		/* I/O accel RAID offload enabled */
+	int offload_to_be_enabled;
+	int hba_ioaccel_enabled;
 	int offload_to_mirror;		/* Send next I/O accelerator RAID
 					 * offload request to mirror drive
 					 */
 	struct raid_map_data raid_map;	/* I/O accelerator RAID map */
-
+	int supports_aborts;
+#define HPSA_DO_NOT_EXPOSE	0x0
+#define HPSA_SG_ATTACH		0x1
+#define HPSA_ULD_ATTACH		0x2
+#define HPSA_SCSI_ADD		(HPSA_SG_ATTACH | HPSA_ULD_ATTACH)
+	u8 expose_state;
+
+	/* Pointers from logical drive map indices to the phys drives that
+	 * make those logical drives.  Note, multiple logical drives may
+	 * share physical drives.  You can have for instance 5 physical
+	 * drives with 3 logical drives each using those same 5 physical
+	 * disks. We need these pointers for counting i/o's out to physical
+	 * devices in order to honor physical device queue depth limits.
+	 */
+	struct hpsa_scsi_dev_t *phys_disk[RAID_MAP_MAX_ENTRIES];
+	int nphysical_disks;
 };
 
 struct reply_queue_buffer {
@@ -115,10 +136,12 @@ struct ctlr_info {
 	void __iomem *vaddr;
 	unsigned long paddr;
 	int 	nr_cmds; /* Number of commands allowed on this controller */
+#define HPSA_CMDS_RESERVED_FOR_ABORTS 2
+#define HPSA_CMDS_RESERVED_FOR_DRIVER 1
 	struct CfgTable __iomem *cfgtable;
 	int	interrupts_enabled;
 	int 	max_commands;
-	int	commands_outstanding;
+	atomic_t commands_outstanding;
 #	define PERF_MODE_INT	0
 #	define DOORBELL_INT	1
 #	define SIMPLE_MODE_INT	2
@@ -131,8 +154,6 @@ struct ctlr_info {
 	char hba_mode_enabled;
 
 	/* queue and queue Info */
-	struct list_head reqQ;
-	struct list_head cmpQ;
 	unsigned int Qdepth;
 	unsigned int maxSG;
 	spinlock_t lock;
@@ -140,6 +161,7 @@ struct ctlr_info {
 	u8 max_cmd_sg_entries;
 	int chainsize;
 	struct SGDescriptor **cmd_sg_list;
+	struct ioaccel2_sg_element **ioaccel2_cmd_sg_list;
 
 	/* pointers to command and error info pool */
 	struct CommandList 	*cmd_pool;
@@ -168,9 +190,8 @@ struct ctlr_info {
 	unsigned long transMethod;
 
 	/* cap concurrent passthrus at some reasonable maximum */
-#define HPSA_MAX_CONCURRENT_PASSTHRUS (20)
-	spinlock_t passthru_count_lock; /* protects passthru_count */
-	int passthru_count;
+#define HPSA_MAX_CONCURRENT_PASSTHRUS (10)
+	atomic_t passthru_cmds_avail;
 
 	/*
 	 * Performant mode completion buffers
@@ -194,10 +215,11 @@ struct ctlr_info {
 	atomic_t firmware_flash_in_progress;
 	u32 *lockup_detected;
 	struct delayed_work monitor_ctlr_work;
+	struct delayed_work rescan_ctlr_work;
 	int remove_in_progress;
-	u32 fifo_recently_full;
 	/* Address of h->q[x] is passed to intr handler to know which queue */
 	u8 q[MAX_REPLY_QUEUES];
+	char intrname[MAX_REPLY_QUEUES][16];	/* "hpsa0-msix00" names */
 	u32 TMFSupportFlags; /* cache what task mgmt funcs are supported. */
 #define HPSATMF_BITS_SUPPORTED  (1 << 0)
 #define HPSATMF_PHYS_LUN_RESET  (1 << 1)
@@ -209,6 +231,7 @@ struct ctlr_info {
 #define HPSATMF_PHYS_QRY_TASK   (1 << 7)
 #define HPSATMF_PHYS_QRY_TSET   (1 << 8)
 #define HPSATMF_PHYS_QRY_ASYNC  (1 << 9)
+#define HPSATMF_IOACCEL_ENABLED (1 << 15)
 #define HPSATMF_MASK_SUPPORTED  (1 << 16)
 #define HPSATMF_LOG_LUN_RESET   (1 << 17)
 #define HPSATMF_LOG_NEX_RESET   (1 << 18)
@@ -237,8 +260,16 @@ struct ctlr_info {
 	spinlock_t offline_device_lock;
 	struct list_head offline_device_list;
 	int	acciopath_status;
-	int	drv_req_rescan;	/* flag for driver to request rescan event */
 	int	raid_offload_debug;
+	int	lockup_detector_enabled;
+	int	needs_abort_tags_swizzled;
+	struct workqueue_struct *resubmit_wq;
+	atomic_t abort_cmds_available;
+	wait_queue_head_t abort_cmd_wait_queue;
+	wait_queue_head_t abort_sync_wait_queue;
+	atomic_t cmds_sent;
+	int abort_test;
+	int abort_timeout;
 };
 
 struct offline_device_entry {
@@ -297,6 +328,8 @@ struct offline_device_entry {
  */
 #define SA5_DOORBELL	0x20
 #define SA5_REQUEST_PORT_OFFSET	0x40
+#define SA5_REQUEST_PORT64_LO_OFFSET 0xC0
+#define SA5_REQUEST_PORT64_HI_OFFSET 0xC4
 #define SA5_REPLY_INTR_MASK_OFFSET	0x34
 #define SA5_REPLY_PORT_OFFSET		0x44
 #define SA5_INTR_STATUS		0x30
@@ -353,10 +386,7 @@ static void SA5_submit_command_no_read(struct ctlr_info *h,
 static void SA5_submit_command_ioaccel2(struct ctlr_info *h,
 	struct CommandList *c)
 {
-	if (c->cmd_type == CMD_IOACCEL2)
-		writel(c->busaddr, h->vaddr + IOACCEL2_INBOUND_POSTQ_32);
-	else
-		writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
+	writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
 }
 
 /*
@@ -395,27 +425,25 @@ static void SA5_performant_intr_mask(struct ctlr_info *h, unsigned long val)
 static unsigned long SA5_performant_completed(struct ctlr_info *h, u8 q)
 {
 	struct reply_queue_buffer *rq = &h->reply_queue[q];
-	unsigned long flags, register_value = FIFO_EMPTY;
+	unsigned long register_value = FIFO_EMPTY;
 
 	/* msi auto clears the interrupt pending bit. */
-	if (!(h->msi_vector || h->msix_vector)) {
+	if (unlikely(!(h->msi_vector || h->msix_vector))) {
 		/* flush the controller write of the reply queue by reading
 		 * outbound doorbell status register.
 		 */
-		register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+		(void) readl(h->vaddr + SA5_OUTDB_STATUS);
 		writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR);
 		/* Do a read in order to flush the write to the controller
 		 * (as per spec.)
 		 */
-		register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+		(void) readl(h->vaddr + SA5_OUTDB_STATUS);
 	}
 
-	if ((rq->head[rq->current_entry] & 1) == rq->wraparound) {
+	if ((((u32) rq->head[rq->current_entry]) & 1) == rq->wraparound) {
 		register_value = rq->head[rq->current_entry];
 		rq->current_entry++;
-		spin_lock_irqsave(&h->lock, flags);
-		h->commands_outstanding--;
-		spin_unlock_irqrestore(&h->lock, flags);
+		atomic_dec(&h->commands_outstanding);
 	} else {
 		register_value = FIFO_EMPTY;
 	}
@@ -427,18 +455,6 @@ static unsigned long SA5_performant_completed(struct ctlr_info *h, u8 q)
 	return register_value;
 }
 
-/*
- *  Returns true if fifo is full.
- *
- */
-static unsigned long SA5_fifo_full(struct ctlr_info *h)
-{
-	if (h->commands_outstanding >= h->max_commands)
-		return 1;
-	else
-		return 0;
-
-}
 /*
  *   returns value read from hardware.
  *     returns FIFO_EMPTY if there is nothing to read
@@ -448,13 +464,9 @@ static unsigned long SA5_completed(struct ctlr_info *h,
 {
 	unsigned long register_value
 		= readl(h->vaddr + SA5_REPLY_PORT_OFFSET);
-	unsigned long flags;
 
-	if (register_value != FIFO_EMPTY) {
-		spin_lock_irqsave(&h->lock, flags);
-		h->commands_outstanding--;
-		spin_unlock_irqrestore(&h->lock, flags);
-	}
+	if (register_value != FIFO_EMPTY)
+		atomic_dec(&h->commands_outstanding);
 
 #ifdef HPSA_DEBUG
 	if (register_value != FIFO_EMPTY)
@@ -483,9 +495,6 @@ static bool SA5_performant_intr_pending(struct ctlr_info *h)
 	if (!register_value)
 		return false;
 
-	if (h->msi_vector || h->msix_vector)
-		return true;
-
 	/* Read outbound doorbell to flush */
 	register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
 	return register_value & SA5_OUTDB_STATUS_PERF_BIT;
@@ -510,7 +519,6 @@ static unsigned long SA5_ioaccel_mode1_completed(struct ctlr_info *h, u8 q)
 {
 	u64 register_value;
 	struct reply_queue_buffer *rq = &h->reply_queue[q];
-	unsigned long flags;
 
 	BUG_ON(q >= h->nreply_queues);
 
@@ -528,9 +536,7 @@ static unsigned long SA5_ioaccel_mode1_completed(struct ctlr_info *h, u8 q)
 		wmb();
 		writel((q << 24) | rq->current_entry, h->vaddr +
 				IOACCEL_MODE1_CONSUMER_INDEX);
-		spin_lock_irqsave(&h->lock, flags);
-		h->commands_outstanding--;
-		spin_unlock_irqrestore(&h->lock, flags);
+		atomic_dec(&h->commands_outstanding);
 	}
 	return (unsigned long) register_value;
 }
@@ -538,7 +544,6 @@ static unsigned long SA5_ioaccel_mode1_completed(struct ctlr_info *h, u8 q)
 static struct access_method SA5_access = {
 	SA5_submit_command,
 	SA5_intr_mask,
-	SA5_fifo_full,
 	SA5_intr_pending,
 	SA5_completed,
 };
@@ -546,7 +551,6 @@ static struct access_method SA5_access = {
 static struct access_method SA5_ioaccel_mode1_access = {
 	SA5_submit_command,
 	SA5_performant_intr_mask,
-	SA5_fifo_full,
 	SA5_ioaccel_mode1_intr_pending,
 	SA5_ioaccel_mode1_completed,
 };
@@ -554,7 +558,6 @@ static struct access_method SA5_ioaccel_mode1_access = {
 static struct access_method SA5_ioaccel_mode2_access = {
 	SA5_submit_command_ioaccel2,
 	SA5_performant_intr_mask,
-	SA5_fifo_full,
 	SA5_performant_intr_pending,
 	SA5_performant_completed,
 };
@@ -562,7 +565,6 @@ static struct access_method SA5_ioaccel_mode2_access = {
 static struct access_method SA5_performant_access = {
 	SA5_submit_command,
 	SA5_performant_intr_mask,
-	SA5_fifo_full,
 	SA5_performant_intr_pending,
 	SA5_performant_completed,
 };
@@ -570,7 +572,6 @@ static struct access_method SA5_performant_access = {
 static struct access_method SA5_performant_access_no_read = {
 	SA5_submit_command_no_read,
 	SA5_performant_intr_mask,
-	SA5_fifo_full,
 	SA5_performant_intr_pending,
 	SA5_performant_completed,
 };
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index b5125dc3143912..5ab1b13f996f83 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -42,8 +42,22 @@
 #define CMD_UNSOLICITED_ABORT   0x000A
 #define CMD_TIMEOUT             0x000B
 #define CMD_UNABORTABLE		0x000C
+#define CMD_TMF_STATUS		0x000D
 #define CMD_IOACCEL_DISABLED	0x000E
+#define CMD_CTLR_LOCKUP		0xffff
+/* Note: CMD_CTLR_LOCKUP is not a value defined by the CISS spec
+ * it is a value defined by the driver that commands can be marked
+ * with when a controller lockup has been detected by the driver
+ */
 
+/* TMF function status values */
+#define CISS_TMF_COMPLETE	0x00
+#define CISS_TMF_INVALID_FRAME	0x02
+#define CISS_TMF_NOT_SUPPORTED	0x04
+#define CISS_TMF_FAILED		0x05
+#define CISS_TMF_SUCCESS	0x08
+#define CISS_TMF_WRONG_LUN	0x09
+#define CISS_TMF_OVERLAPPED_TAG 0x0a
 
 /* Unit Attentions ASC's as defined for the MSA2012sa */
 #define POWER_OR_RESET			0x29
@@ -240,9 +254,16 @@ struct ReportLUNdata {
 
 struct ext_report_lun_entry {
 	u8 lunid[8];
+#define MASKED_DEVICE(x) ((x)[3] & 0xC0)
+#define GET_BMIC_BUS(lunid) ((lunid)[7] & 0x3F)
+#define GET_BMIC_LEVEL_TWO_TARGET(lunid) ((lunid)[6])
+#define GET_BMIC_DRIVE_NUMBER(lunid) (((GET_BMIC_BUS((lunid)) - 1) << 8) + \
+			GET_BMIC_LEVEL_TWO_TARGET((lunid)))
 	u8 wwid[8];
 	u8 device_type;
 	u8 device_flags;
+#define NON_DISK_PHYS_DEV(x) ((x)[17] & 0x01)
+#define PHYS_IOACCEL(x) ((x)[17] & 0x08)
 	u8 lun_count; /* multi-lun device, how many luns */
 	u8 redundant_paths;
 	u32 ioaccel_handle; /* ioaccel1 only uses lower 16 bits */
@@ -252,7 +273,7 @@ struct ReportExtendedLUNdata {
 	u8 LUNListLength[4];
 	u8 extended_response_flag;
 	u8 reserved[3];
-	struct ext_report_lun_entry LUN[HPSA_MAX_LUN];
+	struct ext_report_lun_entry LUN[HPSA_MAX_PHYS_LUN];
 };
 
 struct SenseSubsystem_info {
@@ -268,6 +289,7 @@ struct SenseSubsystem_info {
 #define HPSA_CACHE_FLUSH 0x01	/* C2 was already being used by HPSA */
 #define BMIC_FLASH_FIRMWARE 0xF7
 #define BMIC_SENSE_CONTROLLER_PARAMETERS 0x64
+#define BMIC_IDENTIFY_PHYSICAL_DEVICE 0x15
 
 /* Command List Structure */
 union SCSI3Addr {
@@ -314,28 +336,36 @@ struct CommandListHeader {
 	u8              ReplyQueue;
 	u8              SGList;
 	u16             SGTotal;
-	struct vals32     Tag;
+	u64		tag;
 	union LUNAddr     LUN;
 };
 
 struct RequestBlock {
 	u8   CDBLen;
-	struct {
-		u8 Type:3;
-		u8 Attribute:3;
-		u8 Direction:2;
-	} Type;
+	/*
+	 * type_attr_dir:
+	 * type: low 3 bits
+	 * attr: middle 3 bits
+	 * dir: high 2 bits
+	 */
+	u8	type_attr_dir;
+#define TYPE_ATTR_DIR(t, a, d) ((((d) & 0x03) << 6) |\
+				(((a) & 0x07) << 3) |\
+				((t) & 0x07))
+#define GET_TYPE(tad) ((tad) & 0x07)
+#define GET_ATTR(tad) (((tad) >> 3) & 0x07)
+#define GET_DIR(tad) (((tad) >> 6) & 0x03)
 	u16  Timeout;
 	u8   CDB[16];
 };
 
 struct ErrDescriptor {
-	struct vals32 Addr;
+	u64 Addr;
 	u32  Len;
 };
 
 struct SGDescriptor {
-	struct vals32 Addr;
+	u64 Addr;
 	u32  Len;
 	u32  Ext;
 };
@@ -366,23 +396,21 @@ struct ErrorInfo {
 #define CMD_SCSI	0x03
 #define CMD_IOACCEL1	0x04
 #define CMD_IOACCEL2	0x05
+#define IOACCEL2_TMF	0x06
 
-#define DIRECT_LOOKUP_SHIFT 5
-#define DIRECT_LOOKUP_BIT 0x10
+#define DIRECT_LOOKUP_SHIFT 4
 #define DIRECT_LOOKUP_MASK (~((1 << DIRECT_LOOKUP_SHIFT) - 1))
 
 #define HPSA_ERROR_BIT          0x02
 struct ctlr_info; /* defined in hpsa.h */
-/* The size of this structure needs to be divisible by 32
- * on all architectures because low 5 bits of the addresses
+/* The size of this structure needs to be divisible by 128
+ * on all architectures.  The low 4 bits of the addresses
  * are used as follows:
  *
  * bit 0: to device, used to indicate "performant mode" command
  *        from device, indidcates error status.
  * bit 1-3: to device, indicates block fetch table entry for
  *          reducing DMA in fetching commands from host memory.
- * bit 4: used to indicate whether tag is "direct lookup" (index),
- *        or a bus address.
  */
 
 #define COMMANDLIST_ALIGNMENT 128
@@ -397,9 +425,23 @@ struct CommandList {
 	struct ctlr_info	   *h;
 	int			   cmd_type;
 	long			   cmdindex;
-	struct list_head list;
 	struct completion *waiting;
-	void   *scsi_cmd;
+	struct scsi_cmnd *scsi_cmd;
+	struct work_struct work;
+	struct delayed_work abort_torture_work;
+
+	/* For commands using either of the two "ioaccel" paths to
+	 * bypass the RAID stack and go directly to the physical disk
+	 * phys_disk is a pointer to the hpsa_scsi_dev_t to which the
+	 * i/o is destined.  We need to store that here because the command
+	 * may potentially encounter TASK SET FULL and need to be resubmitted
+	 * For "normal" i/o's not using the "ioaccel" paths, phys_disk is
+	 * not used.
+	 */
+	struct hpsa_scsi_dev_t *phys_disk;
+
+	int abort_pending;
+	atomic_t refcount; /* Must be last to avoid memset in cmd_alloc */
 } __aligned(COMMANDLIST_ALIGNMENT);
 
 /* Max S/G elements in I/O accelerator command */
@@ -434,8 +476,8 @@ struct io_accel1_cmd {
 	u16 timeout_sec;		/* 0x62 - 0x63 */
 	u8  ReplyQueue;			/* 0x64 */
 	u8  reserved9[3];		/* 0x65 - 0x67 */
-	struct vals32 Tag;		/* 0x68 - 0x6F */
-	struct vals32 host_addr;	/* 0x70 - 0x77 */
+	u64 tag;			/* 0x68 - 0x6F */
+	u64 host_addr;			/* 0x70 - 0x77 */
 	u8  CISS_LUN[8];		/* 0x78 - 0x7F */
 	struct SGDescriptor SG[IOACCEL1_MAXSGENTRIES];
 } __aligned(IOACCEL1_COMMANDLIST_ALIGNMENT);
@@ -493,6 +535,12 @@ struct io_accel2_scsi_response {
 #define IOACCEL2_STATUS_SR_TASK_COMP_SET_FULL	0x28
 #define IOACCEL2_STATUS_SR_TASK_COMP_ABORTED	0x40
 #define IOACCEL2_STATUS_SR_IOACCEL_DISABLED	0x0E
+#define IOACCEL2_STATUS_SR_IO_ERROR		0x01
+#define IOACCEL2_STATUS_SR_IO_ABORTED		0x02
+#define IOACCEL2_STATUS_SR_NO_PATH_TO_DEVICE	0x03
+#define IOACCEL2_STATUS_SR_INVALID_DEVICE	0x04
+#define IOACCEL2_STATUS_SR_UNDERRUN		0x51
+#define IOACCEL2_STATUS_SR_OVERRUN		0x75
 	u8 data_present;		/* low 2 bits */
 #define IOACCEL2_NO_DATAPRESENT		0x000
 #define IOACCEL2_RESPONSE_DATAPRESENT	0x001
@@ -545,6 +593,7 @@ struct io_accel2_cmd {
 #define IOACCEL2_DIR_NO_DATA	0x00
 #define IOACCEL2_DIR_DATA_IN	0x01
 #define IOACCEL2_DIR_DATA_OUT	0x02
+#define IOACCEL2_TMF_ABORT	0x01
 /*
  * SCSI Task Management Request format for Accelerator Mode 2
  */
@@ -555,11 +604,11 @@ struct hpsa_tmf_struct {
 	u8 reserved1;		/* byte 3 Reserved */
 	u32 it_nexus;		/* SCSI I-T Nexus */
 	u8 lun_id[8];		/* LUN ID for TMF request */
-	struct vals32 Tag;	/* cciss tag associated w/ request */
-	struct vals32 abort_tag;/* cciss tag of SCSI cmd or task to abort */
+	u64 tag;		/* cciss tag associated w/ request */
+	u64 abort_tag;		/* cciss tag of SCSI cmd or task to abort */
 	u64 error_ptr;		/* Error Pointer */
 	u32 error_len;		/* Error Length */
-};
+} __aligned(IOACCEL2_COMMANDLIST_ALIGNMENT);
 
 /* Configuration Table Structure */
 struct HostWrite {
@@ -636,5 +685,137 @@ struct hpsa_pci_info {
 	u32		board_id;
 };
 
+struct bmic_identify_physical_device {
+	u8 scsi_bus;          /* SCSI Bus number on controller */
+	u8 scsi_id;           /* SCSI ID on this bus */
+	u16 block_size;	     /* sector size in bytes */
+	u32 total_blocks;	     /* number for sectors on drive */
+	u32 reserved_blocks;   /* controller reserved (RIS) */
+	u8 model[40];         /* Physical Drive Model */
+	u8 serial_number[40]; /* Drive Serial Number */
+	u8 firmware_revision[8]; /* drive firmware revision */
+	u8 scsi_inquiry_bits; /* inquiry byte 7 bits */
+	u8 compaq_drive_stamp; /* 0 means drive not stamped */
+	u8 last_failure_reason;
+#define BMIC_LAST_FAILURE_TOO_SMALL_IN_LOAD_CONFIG		0x01
+#define BMIC_LAST_FAILURE_ERROR_ERASING_RIS			0x02
+#define BMIC_LAST_FAILURE_ERROR_SAVING_RIS			0x03
+#define BMIC_LAST_FAILURE_FAIL_DRIVE_COMMAND			0x04
+#define BMIC_LAST_FAILURE_MARK_BAD_FAILED			0x05
+#define BMIC_LAST_FAILURE_MARK_BAD_FAILED_IN_FINISH_REMAP	0x06
+#define BMIC_LAST_FAILURE_TIMEOUT				0x07
+#define BMIC_LAST_FAILURE_AUTOSENSE_FAILED			0x08
+#define BMIC_LAST_FAILURE_MEDIUM_ERROR_1			0x09
+#define BMIC_LAST_FAILURE_MEDIUM_ERROR_2			0x0a
+#define BMIC_LAST_FAILURE_NOT_READY_BAD_SENSE			0x0b
+#define BMIC_LAST_FAILURE_NOT_READY				0x0c
+#define BMIC_LAST_FAILURE_HARDWARE_ERROR			0x0d
+#define BMIC_LAST_FAILURE_ABORTED_COMMAND			0x0e
+#define BMIC_LAST_FAILURE_WRITE_PROTECTED			0x0f
+#define BMIC_LAST_FAILURE_SPIN_UP_FAILURE_IN_RECOVER		0x10
+#define BMIC_LAST_FAILURE_REBUILD_WRITE_ERROR			0x11
+#define BMIC_LAST_FAILURE_TOO_SMALL_IN_HOT_PLUG			0x12
+#define BMIC_LAST_FAILURE_BUS_RESET_RECOVERY_ABORTED		0x13
+#define BMIC_LAST_FAILURE_REMOVED_IN_HOT_PLUG			0x14
+#define BMIC_LAST_FAILURE_INIT_REQUEST_SENSE_FAILED		0x15
+#define BMIC_LAST_FAILURE_INIT_START_UNIT_FAILED		0x16
+#define BMIC_LAST_FAILURE_INQUIRY_FAILED			0x17
+#define BMIC_LAST_FAILURE_NON_DISK_DEVICE			0x18
+#define BMIC_LAST_FAILURE_READ_CAPACITY_FAILED			0x19
+#define BMIC_LAST_FAILURE_INVALID_BLOCK_SIZE			0x1a
+#define BMIC_LAST_FAILURE_HOT_PLUG_REQUEST_SENSE_FAILED		0x1b
+#define BMIC_LAST_FAILURE_HOT_PLUG_START_UNIT_FAILED		0x1c
+#define BMIC_LAST_FAILURE_WRITE_ERROR_AFTER_REMAP		0x1d
+#define BMIC_LAST_FAILURE_INIT_RESET_RECOVERY_ABORTED		0x1e
+#define BMIC_LAST_FAILURE_DEFERRED_WRITE_ERROR			0x1f
+#define BMIC_LAST_FAILURE_MISSING_IN_SAVE_RIS			0x20
+#define BMIC_LAST_FAILURE_WRONG_REPLACE				0x21
+#define BMIC_LAST_FAILURE_GDP_VPD_INQUIRY_FAILED		0x22
+#define BMIC_LAST_FAILURE_GDP_MODE_SENSE_FAILED			0x23
+#define BMIC_LAST_FAILURE_DRIVE_NOT_IN_48BIT_MODE		0x24
+#define BMIC_LAST_FAILURE_DRIVE_TYPE_MIX_IN_HOT_PLUG		0x25
+#define BMIC_LAST_FAILURE_DRIVE_TYPE_MIX_IN_LOAD_CFG		0x26
+#define BMIC_LAST_FAILURE_PROTOCOL_ADAPTER_FAILED		0x27
+#define BMIC_LAST_FAILURE_FAULTY_ID_BAY_EMPTY			0x28
+#define BMIC_LAST_FAILURE_FAULTY_ID_BAY_OCCUPIED		0x29
+#define BMIC_LAST_FAILURE_FAULTY_ID_INVALID_BAY			0x2a
+#define BMIC_LAST_FAILURE_WRITE_RETRIES_FAILED			0x2b
+
+#define BMIC_LAST_FAILURE_SMART_ERROR_REPORTED			0x37
+#define BMIC_LAST_FAILURE_PHY_RESET_FAILED			0x38
+#define BMIC_LAST_FAILURE_ONLY_ONE_CTLR_CAN_SEE_DRIVE		0x40
+#define BMIC_LAST_FAILURE_KC_VOLUME_FAILED			0x41
+#define BMIC_LAST_FAILURE_UNEXPECTED_REPLACEMENT		0x42
+#define BMIC_LAST_FAILURE_OFFLINE_ERASE				0x80
+#define BMIC_LAST_FAILURE_OFFLINE_TOO_SMALL			0x81
+#define BMIC_LAST_FAILURE_OFFLINE_DRIVE_TYPE_MIX		0x82
+#define BMIC_LAST_FAILURE_OFFLINE_ERASE_COMPLETE		0x83
+
+	u8  flags;
+	u8  more_flags;
+	u8  scsi_lun;          /* SCSI LUN for phys drive */
+	u8  yet_more_flags;
+	u8  even_more_flags;
+	u32 spi_speed_rules;/* SPI Speed data:Ultra disable diagnose */
+	u8  phys_connector[2];         /* connector number on controller */
+	u8  phys_box_on_bus;  /* phys enclosure this drive resides */
+	u8  phys_bay_in_box;  /* phys drv bay this drive resides */
+	u32 rpm;              /* Drive rotational speed in rpm */
+	u8  device_type;       /* type of drive */
+	u8  sata_version;     /* only valid when drive_type is SATA */
+	u64 big_total_block_count;
+	u64 ris_starting_lba;
+	u32 ris_size;
+	u8  wwid[20];
+	u8  controller_phy_map[32];
+	u16 phy_count;
+	u8  phy_connected_dev_type[256];
+	u8  phy_to_drive_bay_num[256];
+	u16 phy_to_attached_dev_index[256];
+	u8  box_index;
+	u8  reserved;
+	u16 extra_physical_drive_flags;
+#define BMIC_PHYS_DRIVE_SUPPORTS_GAS_GAUGE(idphydrv) \
+	(idphydrv->extra_physical_drive_flags & (1 << 10))
+	u8  negotiated_link_rate[256];
+	u8  phy_to_phy_map[256];
+	u8  redundant_path_present_map;
+	u8  redundant_path_failure_map;
+	u8  active_path_number;
+	u16 alternate_paths_phys_connector[8];
+	u8  alternate_paths_phys_box_on_port[8];
+	u8  multi_lun_device_lun_count;
+	u8  minimum_good_fw_revision[8];
+	u8  unique_inquiry_bytes[20];
+	u8  current_temperature_degreesC;
+	u8  temperature_threshold_degreesC;
+	u8  max_temperature_degreesC;
+	u8  logical_blocks_per_phys_block_exp; /* phyblocksize = 512 * 2^exp */
+	u16 current_queue_depth_limit;
+	u8  switch_name[10];
+	u16 switch_port;
+	u8  alternate_paths_switch_name[40];
+	u8  alternate_paths_switch_port[8];
+	u16 power_on_hours; /* valid only if gas gauge supported */
+	u16 percent_endurance_used; /* valid only if gas gauge supported. */
+#define BMIC_PHYS_DRIVE_SSD_WEAROUT(idphydrv) \
+	((idphydrv->percent_endurance_used & 0x80) || \
+	 (idphydrv->percent_endurance_used > 10000))
+	u8  drive_authentication;
+#define BMIC_PHYS_DRIVE_AUTHENTICATED(idphydrv) \
+	(idphydrv->drive_authentication == 0x80)
+	u8  smart_carrier_authentication;
+#define BMIC_SMART_CARRIER_AUTHENTICATION_SUPPORTED(idphydrv) \
+	(idphydrv->smart_carrier_authentication != 0x0)
+#define BMIC_SMART_CARRIER_AUTHENTICATED(idphydrv) \
+	(idphydrv->smart_carrier_authentication == 0x01)
+	u8  smart_carrier_app_fw_version;
+	u8  smart_carrier_bootloader_fw_version;
+	u8  encryption_key_name[64];
+	u32 misc_drive_flags;
+	u16 dek_index;
+	u8  padding[112];
+};
+
 #pragma pack()
 #endif /* HPSA_CMD_H */
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 79c77b485a6729..362e443e7d5eb1 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -80,7 +80,10 @@
  * Note - the initial logging level can be set here to log events at boot time.
  * After the system is up, you may enable logging via the /proc interface.
  */
-unsigned int scsi_logging_level;
+unsigned int scsi_logging_level =
+	(0x5 & ((1 << SCSI_LOG_ERROR_BITS) - 1)) << SCSI_LOG_ERROR_SHIFT |
+	(0x1 & ((1 << SCSI_LOG_MLCOMPLETE_BITS) - 1)) << SCSI_LOG_MLCOMPLETE_SHIFT;
+
 #if defined(CONFIG_SCSI_LOGGING)
 EXPORT_SYMBOL(scsi_logging_level);
 #endif
@@ -553,6 +556,23 @@ void scsi_log_send(struct scsi_cmnd *cmd)
 	}
 }
 
+/* Strings for internal return values in scsi.h */
+/* NEEDS_RETRY must be the lowest numbered value */
+static const char * const disposition_label[] = {
+	"NEEDS_RETRY",
+	"SUCCESS",
+	"FAILED",
+	"QUEUED",
+	"SOFT_ERROR",
+	"ADD_TO_MLQUEUE",
+	"TIMEOUT_ERROR",
+	"SCSI_RETURN_NOT_HANDLED",
+	"FAST_IO_FAIL",
+	"UNKNOWN",
+};
+#define DISPOSITION_BASE NEEDS_RETRY
+#define DISPOSITION_UNKNOWN (ARRAY_SIZE(disposition_label) - 1)
+
 void scsi_log_completion(struct scsi_cmnd *cmd, int disposition)
 {
 	unsigned int level;
@@ -574,35 +594,21 @@ void scsi_log_completion(struct scsi_cmnd *cmd, int disposition)
 				       SCSI_LOG_MLCOMPLETE_BITS);
 		if (((level > 0) && (cmd->result || disposition != SUCCESS)) ||
 		    (level > 1)) {
-			scmd_printk(KERN_INFO, cmd, "Done: ");
+			int dindex;
+
+			if (disposition >= DISPOSITION_BASE &&
+			    disposition <= DISPOSITION_BASE +
+					   DISPOSITION_UNKNOWN)
+				dindex = disposition - DISPOSITION_BASE;
+			else
+				dindex = DISPOSITION_UNKNOWN;
 			if (level > 2)
-				printk("0x%p ", cmd);
-			/*
-			 * Dump truncated values, so we usually fit within
-			 * 80 chars.
-			 */
-			switch (disposition) {
-			case SUCCESS:
-				printk("SUCCESS\n");
-				break;
-			case NEEDS_RETRY:
-				printk("RETRY\n");
-				break;
-			case ADD_TO_MLQUEUE:
-				printk("MLQUEUE\n");
-				break;
-			case FAILED:
-				printk("FAILED\n");
-				break;
-			case TIMEOUT_ERROR:
-				/* 
-				 * If called via scsi_times_out.
-				 */
-				printk("TIMEOUT\n");
-				break;
-			default:
-				printk("UNKNOWN\n");
-			}
+				scmd_printk(KERN_INFO, cmd, "Done: 0x%p %s\n",
+					cmd,
+					disposition_label[dindex]);
+			else
+				scmd_printk(KERN_INFO, cmd, "Done: %s\n",
+					disposition_label[dindex]);
 			scsi_print_result(cmd);
 			scsi_print_command(cmd);
 			if (status_byte(cmd->result) & CHECK_CONDITION)