Newer
Older
/*
* Copyright (C) 2003 Sistina Software Limited.
* Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
#include "dm-path-selector.h"
#include "dm-bio-record.h"
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/workqueue.h>
#include <scsi/scsi_dh.h>
#define DM_MSG_PREFIX "multipath"
#define MESG_STR(x) x, sizeof(x)
/* Path properties */
struct pgpath {
struct list_head list;
struct priority_group *pg; /* Owning PG */
unsigned is_active; /* Path status */
struct work_struct deactivate_path;
struct work_struct activate_path;
};
#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
/*
* Paths are grouped into Priority Groups and numbered from 1 upwards.
* Each has a path selector which controls which path gets used.
*/
struct priority_group {
struct list_head list;
struct multipath *m; /* Owning multipath instance */
struct path_selector ps;
unsigned pg_num; /* Reference number */
unsigned bypassed; /* Temporarily bypass this PG? */
unsigned nr_pgpaths; /* Number of paths in PG */
struct list_head pgpaths;
};
/* Multipath context */
struct multipath {
struct list_head list;
struct dm_target *ti;
spinlock_t lock;
const char *hw_handler_name;
unsigned nr_priority_groups;
struct list_head priority_groups;
unsigned pg_init_required; /* pg_init needs calling? */
unsigned pg_init_in_progress; /* Only one pg_init allowed at once */
unsigned nr_valid_paths; /* Total number of usable paths */
struct pgpath *current_pgpath;
struct priority_group *current_pg;
struct priority_group *next_pg; /* Switch to this PG if set */
unsigned repeat_count; /* I/Os left before calling PS again */
unsigned queue_io; /* Must we queue all I/O? */
unsigned queue_if_no_path; /* Queue I/O if last path fails? */
unsigned saved_queue_if_no_path;/* Saved state during suspension */
unsigned pg_init_retries; /* Number of times to retry pg_init */
unsigned pg_init_count; /* Number of times pg_init called */
struct work_struct process_queued_ios;
struct bio_list queued_ios;
unsigned queue_size;
struct work_struct trigger_event;
/*
* We must use a mempool of dm_mpath_io structs so that we
* can resubmit bios on error.
*/
mempool_t *mpio_pool;
};
/*
* Context information attached to each bio we process.
*/
struct pgpath *pgpath;
struct dm_bio_details details;
};
typedef int (*action_fn) (struct pgpath *pgpath);
#define MIN_IOS 256 /* Mempool size */
static struct kmem_cache *_mpio_cache;

Chandra Seetharaman
committed
static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
static void process_queued_ios(struct work_struct *work);
static void trigger_event(struct work_struct *work);

Chandra Seetharaman
committed
static void activate_path(struct work_struct *work);
static void deactivate_path(struct work_struct *work);
/*-----------------------------------------------
* Allocation routines
*-----------------------------------------------*/
static struct pgpath *alloc_pgpath(void)
{
struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
INIT_WORK(&pgpath->deactivate_path, deactivate_path);
INIT_WORK(&pgpath->activate_path, activate_path);
static void free_pgpath(struct pgpath *pgpath)
static void deactivate_path(struct work_struct *work)
{
struct pgpath *pgpath =
container_of(work, struct pgpath, deactivate_path);
blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue);
}
static struct priority_group *alloc_priority_group(void)
{
struct priority_group *pg;
if (pg)
INIT_LIST_HEAD(&pg->pgpaths);
return pg;
}
static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
{
struct pgpath *pgpath, *tmp;
struct multipath *m = ti->private;
list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
list_del(&pgpath->list);
if (m->hw_handler_name)
scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
dm_put_device(ti, pgpath->path.dev);
free_pgpath(pgpath);
}
}
static void free_priority_group(struct priority_group *pg,
struct dm_target *ti)
{
struct path_selector *ps = &pg->ps;
if (ps->type) {
ps->type->destroy(ps);
dm_put_path_selector(ps->type);
}
free_pgpaths(&pg->pgpaths, ti);
kfree(pg);
}
static struct multipath *alloc_multipath(struct dm_target *ti)
if (m) {
INIT_LIST_HEAD(&m->priority_groups);
spin_lock_init(&m->lock);
m->queue_io = 1;
INIT_WORK(&m->process_queued_ios, process_queued_ios);
INIT_WORK(&m->trigger_event, trigger_event);
m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
if (!m->mpio_pool) {
kfree(m);
return NULL;
}
}
return m;
}
static void free_multipath(struct multipath *m)
{
struct priority_group *pg, *tmp;
list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
list_del(&pg->list);
free_priority_group(pg, m->ti);
}
kfree(m->hw_handler_name);
mempool_destroy(m->mpio_pool);
kfree(m);
}
/*-----------------------------------------------
* Path selection
*-----------------------------------------------*/
static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
{
m->current_pg = pgpath->pg;
/* Must we initialise the PG first, and queue I/O till it's ready? */
if (m->hw_handler_name) {
m->pg_init_required = 1;
m->queue_io = 1;
} else {
m->pg_init_required = 0;
m->queue_io = 0;
}
}
static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
{
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
path = pg->ps.type->select_path(&pg->ps, &m->repeat_count);
if (!path)
return -ENXIO;
m->current_pgpath = path_to_pgpath(path);
if (m->current_pg != pg)
__switch_pg(m, m->current_pgpath);
return 0;
}
static void __choose_pgpath(struct multipath *m)
{
struct priority_group *pg;
unsigned bypassed = 1;
if (!m->nr_valid_paths)
goto failed;
/* Were we instructed to switch PG? */
if (m->next_pg) {
pg = m->next_pg;
m->next_pg = NULL;
if (!__choose_path_in_pg(m, pg))
return;
}
/* Don't change PG until it has no remaining paths */
if (m->current_pg && !__choose_path_in_pg(m, m->current_pg))
return;
/*
* Loop through priority groups until we find a valid path.
* First time we skip PGs marked 'bypassed'.
* Second time we only try the ones we skipped.
*/
do {
list_for_each_entry(pg, &m->priority_groups, list) {
if (pg->bypassed == bypassed)
continue;
if (!__choose_path_in_pg(m, pg))
return;
}
} while (bypassed--);
failed:
m->current_pgpath = NULL;
m->current_pg = NULL;
}
/*
* Check whether bios must be queued in the device-mapper core rather
* than here in the target.
*
* m->lock must be held on entry.
*
* If m->queue_if_no_path and m->saved_queue_if_no_path hold the
* same value then we are not between multipath_presuspend()
* and multipath_resume() calls and we have no need to check
* for the DMF_NOFLUSH_SUSPENDING flag.
*/
static int __must_push_back(struct multipath *m)
{
return (m->queue_if_no_path != m->saved_queue_if_no_path &&
dm_noflush_suspending(m->ti));
}
static int map_io(struct multipath *m, struct bio *bio,
struct dm_mpath_io *mpio, unsigned was_queued)
int r = DM_MAPIO_REMAPPED;
unsigned long flags;
struct pgpath *pgpath;
spin_lock_irqsave(&m->lock, flags);
/* Do we need to select a new pgpath? */
if (!m->current_pgpath ||
(!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
__choose_pgpath(m);
pgpath = m->current_pgpath;
if (was_queued)
m->queue_size--;
if ((pgpath && m->queue_io) ||
(!pgpath && m->queue_if_no_path)) {
/* Queue for the daemon to resubmit */
bio_list_add(&m->queued_ios, bio);
m->queue_size++;
if ((m->pg_init_required && !m->pg_init_in_progress) ||
!m->queue_io)
queue_work(kmultipathd, &m->process_queued_ios);
else if (__must_push_back(m))
r = DM_MAPIO_REQUEUE;
else
r = -EIO; /* Failed */
mpio->pgpath = pgpath;
spin_unlock_irqrestore(&m->lock, flags);
return r;
}
/*
* If we run out of usable paths, should we queue I/O or error it?
*/
static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
unsigned save_old_value)
{
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
if (save_old_value)
m->saved_queue_if_no_path = m->queue_if_no_path;
else
m->saved_queue_if_no_path = queue_if_no_path;
if (!m->queue_if_no_path && m->queue_size)
queue_work(kmultipathd, &m->process_queued_ios);
spin_unlock_irqrestore(&m->lock, flags);
return 0;
}
/*-----------------------------------------------------------------
* The multipath daemon is responsible for resubmitting queued ios.
*---------------------------------------------------------------*/
static void dispatch_queued_ios(struct multipath *m)
{
int r;
unsigned long flags;
struct bio *bio = NULL, *next;
union map_info *info;
spin_lock_irqsave(&m->lock, flags);
bio = bio_list_get(&m->queued_ios);
spin_unlock_irqrestore(&m->lock, flags);
while (bio) {
next = bio->bi_next;
bio->bi_next = NULL;
info = dm_get_mapinfo(bio);
mpio = info->ptr;
r = map_io(m, bio, mpio, 1);
if (r < 0)
else if (r == DM_MAPIO_REMAPPED)
else if (r == DM_MAPIO_REQUEUE)
static void process_queued_ios(struct work_struct *work)
struct multipath *m =
container_of(work, struct multipath, process_queued_ios);
struct pgpath *pgpath = NULL, *tmp;
unsigned must_queue = 1;
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
if (!m->queue_size)
goto out;
if (!m->current_pgpath)
__choose_pgpath(m);
pgpath = m->current_pgpath;
if ((pgpath && !m->queue_io) ||
(!pgpath && !m->queue_if_no_path))
must_queue = 0;
if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
if (queue_work(kmpath_handlerd, &tmp->activate_path))
m->pg_init_in_progress++;
}
}
out:
spin_unlock_irqrestore(&m->lock, flags);
if (!must_queue)
dispatch_queued_ios(m);
}
/*
* An event is triggered whenever a path is taken out of use.
* Includes path failure and PG bypass.
*/
static void trigger_event(struct work_struct *work)
struct multipath *m =
container_of(work, struct multipath, trigger_event);
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
dm_table_event(m->ti->table);
}
/*-----------------------------------------------------------------
* Constructor/argument parsing:
* <#multipath feature args> [<arg>]*
* <#hw_handler args> [hw_handler [<arg>]*]
* <#priority groups>
* <initial priority group>
* [<selector> <#selector args> [<arg>]*
* <#paths> <#per-path selector args>
* [<path> [<arg>]* ]+ ]+
*---------------------------------------------------------------*/
struct param {
unsigned min;
unsigned max;
char *error;
};
static int read_param(struct param *param, char *str, unsigned *v, char **error)
{
if (!str ||
(sscanf(str, "%u", v) != 1) ||
(*v < param->min) ||
(*v > param->max)) {
*error = param->error;
return -EINVAL;
}
return 0;
}
struct arg_set {
unsigned argc;
char **argv;
};
static char *shift(struct arg_set *as)
{
char *r;
if (as->argc) {
as->argc--;
r = *as->argv;
as->argv++;
return r;
}
return NULL;
}
static void consume(struct arg_set *as, unsigned n)
{
BUG_ON (as->argc < n);
as->argc -= n;
as->argv += n;
}
static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
struct dm_target *ti)
{
int r;
struct path_selector_type *pst;
unsigned ps_argc;
static struct param _params[] = {
{0, 1024, "invalid number of path selector args"},
};
pst = dm_get_path_selector(shift(as));
if (!pst) {
ti->error = "unknown path selector type";
return -EINVAL;
}
r = read_param(_params, shift(as), &ps_argc, &ti->error);
if (r) {
dm_put_path_selector(pst);
if (ps_argc > as->argc) {
dm_put_path_selector(pst);
ti->error = "not enough arguments for path selector";
return -EINVAL;
}
r = pst->create(&pg->ps, ps_argc, as->argv);
if (r) {
dm_put_path_selector(pst);
ti->error = "path selector constructor failed";
return r;
}
pg->ps.type = pst;
consume(as, ps_argc);
return 0;
}
static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
struct dm_target *ti)
{
int r;
struct pgpath *p;
struct multipath *m = ti->private;
/* we need at least a path arg */
if (as->argc < 1) {
ti->error = "no device given";
r = dm_get_device(ti, shift(as), ti->begin, ti->len,
dm_table_get_mode(ti->table), &p->path.dev);
if (r) {
ti->error = "error getting device";
if (m->hw_handler_name) {
struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
r = scsi_dh_attach(q, m->hw_handler_name);
if (r == -EBUSY) {
/*
* Already attached to different hw_handler,
* try to reattach with correct one.
*/
scsi_dh_detach(q);
r = scsi_dh_attach(q, m->hw_handler_name);
}
ti->error = "error attaching hardware handler";
dm_put_device(ti, p->path.dev);
goto bad;
}
}
r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
if (r) {
dm_put_device(ti, p->path.dev);
goto bad;
}
return p;
bad:
free_pgpath(p);
}
static struct priority_group *parse_priority_group(struct arg_set *as,
{1, 1024, "invalid number of paths"},
{0, 1024, "invalid number of selector args"}
};
int r;
unsigned i, nr_selector_args, nr_params;
struct priority_group *pg;
ti->error = "not enough priority group arguments";
return ERR_PTR(-EINVAL);
ti->error = "couldn't allocate priority group";
}
pg->m = m;
r = parse_path_selector(as, pg, ti);
if (r)
goto bad;
/*
* read the paths
*/
r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
if (r)
goto bad;
r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
if (r)
goto bad;
nr_params = 1 + nr_selector_args;
for (i = 0; i < pg->nr_pgpaths; i++) {
struct pgpath *pgpath;
struct arg_set path_args;
if (as->argc < nr_params) {
ti->error = "not enough path parameters";
path_args.argc = nr_params;
path_args.argv = as->argv;
pgpath = parse_path(&path_args, &pg->ps, ti);
if (IS_ERR(pgpath)) {
r = PTR_ERR(pgpath);
pgpath->pg = pg;
list_add_tail(&pgpath->list, &pg->pgpaths);
consume(as, nr_params);
}
return pg;
bad:
free_priority_group(pg, ti);
static int parse_hw_handler(struct arg_set *as, struct multipath *m)
{0, 1024, "invalid number of hardware handler args"},
if (read_param(_params, shift(as), &hw_argc, &ti->error))
return -EINVAL;
if (!hw_argc)
return 0;
if (hw_argc > as->argc) {
ti->error = "not enough arguments for hardware handler";
return -EINVAL;
}
m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
request_module("scsi_dh_%s", m->hw_handler_name);
if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
ti->error = "unknown hardware handler type";
kfree(m->hw_handler_name);
m->hw_handler_name = NULL;
if (hw_argc > 1)
DMWARN("Ignoring user-specified arguments for "
"hardware handler \"%s\"", m->hw_handler_name);
static int parse_features(struct arg_set *as, struct multipath *m)
{0, 3, "invalid number of feature args"},
{1, 50, "pg_init_retries must be between 1 and 50"},
};
r = read_param(_params, shift(as), &argc, &ti->error);
if (r)
return -EINVAL;
if (!argc)
return 0;
do {
param_name = shift(as);
argc--;
if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
r = queue_if_no_path(m, 1, 0);
continue;
}
if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
(argc >= 1)) {
r = read_param(_params + 1, shift(as),
&m->pg_init_retries, &ti->error);
argc--;
continue;
}
r = -EINVAL;
} while (argc && !r);
return r;
}
static int multipath_ctr(struct dm_target *ti, unsigned int argc,
char **argv)
{
/* target parameters */
static struct param _params[] = {
{1, 1024, "invalid number of priority groups"},
{1, 1024, "invalid initial priority group number"},
};
int r;
struct multipath *m;
struct arg_set as;
unsigned pg_count = 0;
unsigned next_pg_num;
as.argc = argc;
as.argv = argv;
ti->error = "can't allocate multipath";
if (r)
goto bad;
r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
if (r)
goto bad;
r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
if (r)
goto bad;
/* parse the priority groups */
while (as.argc) {
struct priority_group *pg;
if (IS_ERR(pg)) {
r = PTR_ERR(pg);
goto bad;
}
m->nr_valid_paths += pg->nr_pgpaths;
list_add_tail(&pg->list, &m->priority_groups);
pg_count++;
pg->pg_num = pg_count;
if (!--next_pg_num)
m->next_pg = pg;
}
if (pg_count != m->nr_priority_groups) {
ti->error = "priority group count mismatch";
r = -EINVAL;
goto bad;
}
return 0;
bad:
free_multipath(m);
return r;
}
static void multipath_dtr(struct dm_target *ti)
{
struct multipath *m = (struct multipath *) ti->private;

Chandra Seetharaman
committed
flush_workqueue(kmpath_handlerd);
flush_workqueue(kmultipathd);
free_multipath(m);
}
/*
* Map bios, recording original fields for later in case we have to resubmit
*/
static int multipath_map(struct dm_target *ti, struct bio *bio,
union map_info *map_context)
{
int r;
struct multipath *m = (struct multipath *) ti->private;
mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
dm_bio_record(&mpio->details, bio);
map_context->ptr = mpio;
bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT);
if (r < 0 || r == DM_MAPIO_REQUEUE)
mempool_free(mpio, m->mpio_pool);
return r;
}
/*
* Take a path out of use.
*/
static int fail_path(struct pgpath *pgpath)
{
unsigned long flags;
struct multipath *m = pgpath->pg->m;
spin_lock_irqsave(&m->lock, flags);
DMWARN("Failing path %s.", pgpath->path.dev->name);
pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
pgpath->fail_count++;
m->nr_valid_paths--;
if (pgpath == m->current_pgpath)
m->current_pgpath = NULL;
dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
pgpath->path.dev->name, m->nr_valid_paths);
schedule_work(&m->trigger_event);
queue_work(kmultipathd, &pgpath->deactivate_path);
out:
spin_unlock_irqrestore(&m->lock, flags);
return 0;
}
/*
* Reinstate a previously-failed path
*/
static int reinstate_path(struct pgpath *pgpath)
{
int r = 0;
unsigned long flags;
struct multipath *m = pgpath->pg->m;
spin_lock_irqsave(&m->lock, flags);
if (!pgpath->pg->ps.type->reinstate_path) {
DMWARN("Reinstate path not supported by path selector %s",
pgpath->pg->ps.type->name);
r = -EINVAL;
goto out;
}
r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
if (r)
goto out;
if (!m->nr_valid_paths++ && m->queue_size) {
m->current_pgpath = NULL;
queue_work(kmultipathd, &m->process_queued_ios);
} else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
if (queue_work(kmpath_handlerd, &pgpath->activate_path))
m->pg_init_in_progress++;
}
dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
pgpath->path.dev->name, m->nr_valid_paths);
schedule_work(&m->trigger_event);
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
out:
spin_unlock_irqrestore(&m->lock, flags);
return r;
}
/*
* Fail or reinstate all paths that match the provided struct dm_dev.
*/
static int action_dev(struct multipath *m, struct dm_dev *dev,
action_fn action)
{
int r = 0;
struct pgpath *pgpath;
struct priority_group *pg;
list_for_each_entry(pg, &m->priority_groups, list) {
list_for_each_entry(pgpath, &pg->pgpaths, list) {
if (pgpath->path.dev == dev)
r = action(pgpath);
}
}
return r;
}
/*
* Temporarily try to avoid having to use the specified PG
*/
static void bypass_pg(struct multipath *m, struct priority_group *pg,
int bypassed)
{
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
pg->bypassed = bypassed;
m->current_pgpath = NULL;
m->current_pg = NULL;
spin_unlock_irqrestore(&m->lock, flags);
schedule_work(&m->trigger_event);
}
/*
* Switch to using the specified PG from the next I/O that gets mapped
*/
static int switch_pg_num(struct multipath *m, const char *pgstr)