2 * Copyright (C) 2004 SUSE LINUX Products GmbH. All rights reserved.
3 * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
5 * This file is released under the GPL.
7 * Multipath support for EMC CLARiiON AX/CX-series hardware.
11 #include "dm-hw-handler.h"
12 #include <scsi/scsi.h>
13 #include <scsi/scsi_cmnd.h>
15 #define DM_MSG_PREFIX "multipath emc"
20 /* Whether we should send the short trespass command (FC-series)
21 * or the long version (default for AX/CX CLARiiON arrays). */
22 unsigned short_trespass;
23 /* Whether or not to honor SCSI reservations when initiating a
24 * switch-over. Default: Don't. */
27 unsigned char sense[SCSI_SENSE_BUFFERSIZE];
30 #define TRESPASS_PAGE 0x22
31 #define EMC_FAILOVER_TIMEOUT (60 * HZ)
33 /* Code borrowed from dm-lsi-rdac by Mike Christie */
35 static inline void free_bio(struct bio *bio)
37 __free_page(bio->bi_io_vec[0].bv_page);
41 static int emc_endio(struct bio *bio, unsigned int bytes_done, int error)
43 struct path *path = bio->bi_private;
48 /* We also need to look at the sense keys here whether or not to
49 * switch to the next PG etc.
51 * For now simple logic: either it works or it doesn't.
54 dm_pg_init_complete(path, MP_FAIL_PATH);
56 dm_pg_init_complete(path, 0);
58 /* request is freed in block layer */
64 static struct bio *get_failover_bio(struct path *path, unsigned data_size)
69 bio = bio_alloc(GFP_ATOMIC, 1);
71 DMERR("get_failover_bio: bio_alloc() failed.");
75 bio->bi_rw |= (1 << BIO_RW);
76 bio->bi_bdev = path->dev->bdev;
78 bio->bi_private = path;
79 bio->bi_end_io = emc_endio;
81 page = alloc_page(GFP_ATOMIC);
83 DMERR("get_failover_bio: alloc_page() failed.");
88 if (bio_add_page(bio, page, data_size, 0) != data_size) {
89 DMERR("get_failover_bio: alloc_page() failed.");
98 static struct request *get_failover_req(struct emc_handler *h,
99 struct bio *bio, struct path *path)
102 struct block_device *bdev = bio->bi_bdev;
103 struct request_queue *q = bdev_get_queue(bdev);
105 /* FIXME: Figure out why it fails with GFP_ATOMIC. */
106 rq = blk_get_request(q, WRITE, __GFP_WAIT);
108 DMERR("get_failover_req: blk_get_request failed");
112 rq->bio = rq->biotail = bio;
113 blk_rq_bio_prep(q, rq, bio);
115 rq->rq_disk = bdev->bd_contains->bd_disk;
117 /* bio backed don't set data */
118 rq->buffer = rq->data = NULL;
119 /* rq data_len used for pc cmd's request_bufflen */
120 rq->data_len = bio->bi_size;
122 rq->sense = h->sense;
123 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
126 memset(&rq->cmd, 0, BLK_MAX_CDB);
128 rq->timeout = EMC_FAILOVER_TIMEOUT;
129 rq->flags |= (REQ_BLOCK_PC | REQ_FAILFAST | REQ_NOMERGE);
134 static struct request *emc_trespass_get(struct emc_handler *h,
139 unsigned char *page22;
140 unsigned char long_trespass_pg[] = {
142 TRESPASS_PAGE, /* Page code */
143 0x09, /* Page length - 2 */
144 h->hr ? 0x01 : 0x81, /* Trespass code + Honor reservation bit */
145 0xff, 0xff, /* Trespass target */
146 0, 0, 0, 0, 0, 0 /* Reserved bytes / unknown */
148 unsigned char short_trespass_pg[] = {
150 TRESPASS_PAGE, /* Page code */
151 0x02, /* Page length - 2 */
152 h->hr ? 0x01 : 0x81, /* Trespass code + Honor reservation bit */
153 0xff, /* Trespass target */
155 unsigned data_size = h->short_trespass ? sizeof(short_trespass_pg) :
156 sizeof(long_trespass_pg);
158 /* get bio backing */
159 if (data_size > PAGE_SIZE)
160 /* this should never happen */
163 bio = get_failover_bio(path, data_size);
165 DMERR("emc_trespass_get: no bio");
169 page22 = (unsigned char *)bio_data(bio);
170 memset(page22, 0, data_size);
172 memcpy(page22, h->short_trespass ?
173 short_trespass_pg : long_trespass_pg, data_size);
175 /* get request for block layer packet command */
176 rq = get_failover_req(h, bio, path);
178 DMERR("emc_trespass_get: no rq");
183 /* Prepare the command. */
184 rq->cmd[0] = MODE_SELECT;
186 rq->cmd[4] = data_size;
187 rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
192 static void emc_pg_init(struct hw_handler *hwh, unsigned bypassed,
196 struct request_queue *q = bdev_get_queue(path->dev->bdev);
199 * We can either blindly init the pg (then look at the sense),
200 * or we can send some commands to get the state here (then
201 * possibly send the fo cmnd), or we can also have the
202 * initial state passed into us and then get an update here.
205 DMINFO("emc_pg_init: no queue");
209 /* FIXME: The request should be pre-allocated. */
210 rq = emc_trespass_get(hwh->context, path);
212 DMERR("emc_pg_init: no rq");
216 DMINFO("emc_pg_init: sending switch-over command");
217 elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1);
221 dm_pg_init_complete(path, MP_FAIL_PATH);
224 static struct emc_handler *alloc_emc_handler(void)
226 struct emc_handler *h = kmalloc(sizeof(*h), GFP_KERNEL);
229 memset(h, 0, sizeof(*h));
230 spin_lock_init(&h->lock);
236 static int emc_create(struct hw_handler *hwh, unsigned argc, char **argv)
238 struct emc_handler *h;
239 unsigned hr, short_trespass;
242 /* No arguments: use defaults */
245 } else if (argc != 2) {
246 DMWARN("incorrect number of arguments");
249 if ((sscanf(argv[0], "%u", &short_trespass) != 1)
250 || (short_trespass > 1)) {
251 DMWARN("invalid trespass mode selected");
255 if ((sscanf(argv[1], "%u", &hr) != 1)
257 DMWARN("invalid honor reservation flag selected");
262 h = alloc_emc_handler();
268 if ((h->short_trespass = short_trespass))
269 DMWARN("short trespass command will be send");
271 DMWARN("long trespass command will be send");
274 DMWARN("honor reservation bit will be set");
276 DMWARN("honor reservation bit will not be set (default)");
281 static void emc_destroy(struct hw_handler *hwh)
283 struct emc_handler *h = (struct emc_handler *) hwh->context;
289 static unsigned emc_error(struct hw_handler *hwh, struct bio *bio)
291 /* FIXME: Patch from axboe still missing */
295 if (bio->bi_error & BIO_SENSE) {
296 sense = bio->bi_error & 0xffffff; /* sense key / asc / ascq */
298 if (sense == 0x020403) {
299 /* LUN Not Ready - Manual Intervention Required
300 * indicates this is a passive path.
302 * FIXME: However, if this is seen and EVPD C0
303 * indicates that this is due to a NDU in
304 * progress, we should set FAIL_PATH too.
305 * This indicates we might have to do a SCSI
306 * inquiry in the end_io path. Ugh. */
307 return MP_BYPASS_PG | MP_RETRY_IO;
308 } else if (sense == 0x052501) {
309 /* An array based copy is in progress. Do not
310 * fail the path, do not bypass to another PG,
311 * do not retry. Fail the IO immediately.
312 * (Actually this is the same conclusion as in
313 * the default handler, but lets make sure.) */
315 } else if (sense == 0x062900) {
316 /* Unit Attention Code. This is the first IO
317 * to the new path, so just retry. */
323 /* Try default handler */
324 return dm_scsi_err_handler(hwh, bio);
327 static struct hw_handler_type emc_hwh = {
329 .module = THIS_MODULE,
330 .create = emc_create,
331 .destroy = emc_destroy,
332 .pg_init = emc_pg_init,
336 static int __init dm_emc_init(void)
338 int r = dm_register_hw_handler(&emc_hwh);
341 DMERR("register failed %d", r);
343 DMINFO("version 0.0.3 loaded");
348 static void __exit dm_emc_exit(void)
350 int r = dm_unregister_hw_handler(&emc_hwh);
353 DMERR("unregister failed %d", r);
356 module_init(dm_emc_init);
357 module_exit(dm_emc_exit);
359 MODULE_DESCRIPTION(DM_NAME " EMC CX/AX/FC-family multipath");
360 MODULE_AUTHOR("Lars Marowsky-Bree <lmb@suse.de>");
361 MODULE_LICENSE("GPL");