Skip to content
Snippets Groups Projects
dm-cache-target.c 64.4 KiB
Newer Older
  • Learn to ignore specific revisions
  • Joe Thornber's avatar
    Joe Thornber committed
    /*
     * Copyright (C) 2012 Red Hat. All rights reserved.
     *
     * This file is released under the GPL.
     */
    
    #include "dm.h"
    #include "dm-bio-prison.h"
    
    #include "dm-bio-record.h"
    
    Joe Thornber's avatar
    Joe Thornber committed
    #include "dm-cache-metadata.h"
    
    #include <linux/dm-io.h>
    #include <linux/dm-kcopyd.h>
    #include <linux/init.h>
    #include <linux/mempool.h>
    #include <linux/module.h>
    #include <linux/slab.h>
    #include <linux/vmalloc.h>
    
    #define DM_MSG_PREFIX "cache"
    
    DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
    	"A percentage of time allocated for copying to and/or from cache");
    
    /*----------------------------------------------------------------*/
    
    /*
     * Glossary:
     *
     * oblock: index of an origin block
     * cblock: index of a cache block
     * promotion: movement of a block from origin to cache
     * demotion: movement of a block from cache to origin
     * migration: movement of a block between the origin and cache device,
     *	      either direction
     */
    
    /*----------------------------------------------------------------*/
    
    static size_t bitset_size_in_bytes(unsigned nr_entries)
    {
    	return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
    }
    
    static unsigned long *alloc_bitset(unsigned nr_entries)
    {
    	size_t s = bitset_size_in_bytes(nr_entries);
    	return vzalloc(s);
    }
    
    static void clear_bitset(void *bitset, unsigned nr_entries)
    {
    	size_t s = bitset_size_in_bytes(nr_entries);
    	memset(bitset, 0, s);
    }
    
    static void free_bitset(unsigned long *bits)
    {
    	vfree(bits);
    }
    
    /*----------------------------------------------------------------*/
    
    #define PRISON_CELLS 1024
    #define MIGRATION_POOL_SIZE 128
    #define COMMIT_PERIOD HZ
    #define MIGRATION_COUNT_WINDOW 10
    
    /*
     * The block size of the device holding cache data must be >= 32KB
     */
    #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
    
    /*
     * FIXME: the cache is read/write for the time being.
     */
    enum cache_mode {
    	CM_WRITE,		/* metadata may be changed */
    	CM_READ_ONLY,		/* metadata may not be changed */
    };
    
    struct cache_features {
    	enum cache_mode mode;
    	bool write_through:1;
    };
    
    struct cache_stats {
    	atomic_t read_hit;
    	atomic_t read_miss;
    	atomic_t write_hit;
    	atomic_t write_miss;
    	atomic_t demotion;
    	atomic_t promotion;
    	atomic_t copies_avoided;
    	atomic_t cache_cell_clash;
    	atomic_t commit_count;
    	atomic_t discard_count;
    };
    
    struct cache {
    	struct dm_target *ti;
    	struct dm_target_callbacks callbacks;
    
    	/*
    	 * Metadata is written to this device.
    	 */
    	struct dm_dev *metadata_dev;
    
    	/*
    	 * The slower of the two data devices.  Typically a spindle.
    	 */
    	struct dm_dev *origin_dev;
    
    	/*
    	 * The faster of the two data devices.  Typically an SSD.
    	 */
    	struct dm_dev *cache_dev;
    
    	/*
    	 * Cache features such as write-through.
    	 */
    	struct cache_features features;
    
    	/*
    	 * Size of the origin device in _complete_ blocks and native sectors.
    	 */
    	dm_oblock_t origin_blocks;
    	sector_t origin_sectors;
    
    	/*
    	 * Size of the cache device in blocks.
    	 */
    	dm_cblock_t cache_size;
    
    	/*
    	 * Fields for converting from sectors to blocks.
    	 */
    	uint32_t sectors_per_block;
    	int sectors_per_block_shift;
    
    	struct dm_cache_metadata *cmd;
    
    	spinlock_t lock;
    	struct bio_list deferred_bios;
    	struct bio_list deferred_flush_bios;
    
    	struct bio_list deferred_writethrough_bios;
    
    Joe Thornber's avatar
    Joe Thornber committed
    	struct list_head quiesced_migrations;
    	struct list_head completed_migrations;
    	struct list_head need_commit_migrations;
    	sector_t migration_threshold;
    	atomic_t nr_migrations;
    	wait_queue_head_t migration_wait;
    
    	/*
    	 * cache_size entries, dirty if set
    	 */
    	dm_cblock_t nr_dirty;
    	unsigned long *dirty_bitset;
    
    	/*
    	 * origin_blocks entries, discarded if set.
    	 */
    
    	uint32_t discard_block_size; /* a power of 2 times sectors per block */
    
    Joe Thornber's avatar
    Joe Thornber committed
    	dm_dblock_t discard_nr_blocks;
    	unsigned long *discard_bitset;
    
    	struct dm_kcopyd_client *copier;
    	struct workqueue_struct *wq;
    	struct work_struct worker;
    
    	struct delayed_work waker;
    	unsigned long last_commit_jiffies;
    
    	struct dm_bio_prison *prison;
    	struct dm_deferred_set *all_io_ds;
    
    	mempool_t *migration_pool;
    	struct dm_cache_migration *next_migration;
    
    	struct dm_cache_policy *policy;
    	unsigned policy_nr_args;
    
    	bool need_tick_bio:1;
    	bool sized:1;
    	bool quiescing:1;
    	bool commit_requested:1;
    	bool loaded_mappings:1;
    	bool loaded_discards:1;
    
    	struct cache_stats stats;
    
    	/*
    	 * Rather than reconstructing the table line for the status we just
    	 * save it and regurgitate.
    	 */
    	unsigned nr_ctr_args;
    	const char **ctr_args;
    };
    
    struct per_bio_data {
    	bool tick:1;
    	unsigned req_nr:2;
    	struct dm_deferred_entry *all_io_entry;
    
    	/*
    	 * writethrough fields.  These MUST remain at the end of this
    	 * structure and the 'cache' member must be the first as it
    
    	 * is used to determine the offset of the writethrough fields.
    
    	struct cache *cache;
    	dm_cblock_t cblock;
    	bio_end_io_t *saved_bi_end_io;
    
    	struct dm_bio_details bio_details;
    
    Joe Thornber's avatar
    Joe Thornber committed
    };
    
    struct dm_cache_migration {
    	struct list_head list;
    	struct cache *cache;
    
    	unsigned long start_jiffies;
    	dm_oblock_t old_oblock;
    	dm_oblock_t new_oblock;
    	dm_cblock_t cblock;
    
    	bool err:1;
    	bool writeback:1;
    	bool demote:1;
    	bool promote:1;
    
    	struct dm_bio_prison_cell *old_ocell;
    	struct dm_bio_prison_cell *new_ocell;
    };
    
    /*
     * Processing a bio in the worker thread may require these memory
     * allocations.  We prealloc to avoid deadlocks (the same worker thread
     * frees them back to the mempool).
     */
    struct prealloc {
    	struct dm_cache_migration *mg;
    	struct dm_bio_prison_cell *cell1;
    	struct dm_bio_prison_cell *cell2;
    };
    
    static void wake_worker(struct cache *cache)
    {
    	queue_work(cache->wq, &cache->worker);
    }
    
    /*----------------------------------------------------------------*/
    
    static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
    {
    	/* FIXME: change to use a local slab. */
    	return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
    }
    
    static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
    {
    	dm_bio_prison_free_cell(cache->prison, cell);
    }
    
    static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
    {
    	if (!p->mg) {
    		p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
    		if (!p->mg)
    			return -ENOMEM;
    	}
    
    	if (!p->cell1) {
    		p->cell1 = alloc_prison_cell(cache);
    		if (!p->cell1)
    			return -ENOMEM;
    	}
    
    	if (!p->cell2) {
    		p->cell2 = alloc_prison_cell(cache);
    		if (!p->cell2)
    			return -ENOMEM;
    	}
    
    	return 0;
    }
    
    static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
    {
    	if (p->cell2)
    		free_prison_cell(cache, p->cell2);
    
    	if (p->cell1)
    		free_prison_cell(cache, p->cell1);
    
    	if (p->mg)
    		mempool_free(p->mg, cache->migration_pool);
    }
    
    static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
    {
    	struct dm_cache_migration *mg = p->mg;
    
    	BUG_ON(!mg);
    	p->mg = NULL;
    
    	return mg;
    }
    
    /*
     * You must have a cell within the prealloc struct to return.  If not this
     * function will BUG() rather than returning NULL.
     */
    static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
    {
    	struct dm_bio_prison_cell *r = NULL;
    
    	if (p->cell1) {
    		r = p->cell1;
    		p->cell1 = NULL;
    
    	} else if (p->cell2) {
    		r = p->cell2;
    		p->cell2 = NULL;
    	} else
    		BUG();
    
    	return r;
    }
    
    /*
     * You can't have more than two cells in a prealloc struct.  BUG() will be
     * called if you try and overfill.
     */
    static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
    {
    	if (!p->cell2)
    		p->cell2 = cell;
    
    	else if (!p->cell1)
    		p->cell1 = cell;
    
    	else
    		BUG();
    }
    
    /*----------------------------------------------------------------*/
    
    static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
    {
    	key->virtual = 0;
    	key->dev = 0;
    	key->block = from_oblock(oblock);
    }
    
    /*
     * The caller hands in a preallocated cell, and a free function for it.
     * The cell will be freed if there's an error, or if it wasn't used because
     * a cell with that key already exists.
     */
    typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
    
    static int bio_detain(struct cache *cache, dm_oblock_t oblock,
    		      struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
    		      cell_free_fn free_fn, void *free_context,
    		      struct dm_bio_prison_cell **cell_result)
    {
    	int r;
    	struct dm_cell_key key;
    
    	build_key(oblock, &key);
    	r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
    	if (r)
    		free_fn(free_context, cell_prealloc);
    
    	return r;
    }
    
    static int get_cell(struct cache *cache,
    		    dm_oblock_t oblock,
    		    struct prealloc *structs,
    		    struct dm_bio_prison_cell **cell_result)
    {
    	int r;
    	struct dm_cell_key key;
    	struct dm_bio_prison_cell *cell_prealloc;
    
    	cell_prealloc = prealloc_get_cell(structs);
    
    	build_key(oblock, &key);
    	r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
    	if (r)
    		prealloc_put_cell(structs, cell_prealloc);
    
    	return r;
    }
    
    
    /*----------------------------------------------------------------*/
    
    Joe Thornber's avatar
    Joe Thornber committed
    
    static bool is_dirty(struct cache *cache, dm_cblock_t b)
    {
    	return test_bit(from_cblock(b), cache->dirty_bitset);
    }
    
    static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
    {
    	if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
    		cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1);
    		policy_set_dirty(cache->policy, oblock);
    	}
    }
    
    static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
    {
    	if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
    		policy_clear_dirty(cache->policy, oblock);
    		cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1);
    		if (!from_cblock(cache->nr_dirty))
    			dm_table_event(cache->ti->table);
    	}
    }
    
    /*----------------------------------------------------------------*/
    
    Joe Thornber's avatar
    Joe Thornber committed
    static bool block_size_is_power_of_two(struct cache *cache)
    {
    	return cache->sectors_per_block_shift >= 0;
    }
    
    
    /* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */
    #if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
    __always_inline
    #endif
    
    static dm_block_t block_div(dm_block_t b, uint32_t n)
    {
    	do_div(b, n);
    
    	return b;
    }
    
    
    Joe Thornber's avatar
    Joe Thornber committed
    static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
    {
    
    	uint32_t discard_blocks = cache->discard_block_size;
    
    Joe Thornber's avatar
    Joe Thornber committed
    	dm_block_t b = from_oblock(oblock);
    
    	if (!block_size_is_power_of_two(cache))
    
    		discard_blocks = discard_blocks / cache->sectors_per_block;
    
    Joe Thornber's avatar
    Joe Thornber committed
    	else
    		discard_blocks >>= cache->sectors_per_block_shift;
    
    
    	b = block_div(b, discard_blocks);
    
    Joe Thornber's avatar
    Joe Thornber committed
    
    	return to_dblock(b);
    }
    
    static void set_discard(struct cache *cache, dm_dblock_t b)
    {
    	unsigned long flags;
    
    	atomic_inc(&cache->stats.discard_count);
    
    	spin_lock_irqsave(&cache->lock, flags);
    	set_bit(from_dblock(b), cache->discard_bitset);
    	spin_unlock_irqrestore(&cache->lock, flags);
    }
    
    static void clear_discard(struct cache *cache, dm_dblock_t b)
    {
    	unsigned long flags;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	clear_bit(from_dblock(b), cache->discard_bitset);
    	spin_unlock_irqrestore(&cache->lock, flags);
    }
    
    static bool is_discarded(struct cache *cache, dm_dblock_t b)
    {
    	int r;
    	unsigned long flags;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	r = test_bit(from_dblock(b), cache->discard_bitset);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	return r;
    }
    
    static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
    {
    	int r;
    	unsigned long flags;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
    		     cache->discard_bitset);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	return r;
    }
    
    /*----------------------------------------------------------------*/
    
    static void load_stats(struct cache *cache)
    {
    	struct dm_cache_statistics stats;
    
    	dm_cache_metadata_get_stats(cache->cmd, &stats);
    	atomic_set(&cache->stats.read_hit, stats.read_hits);
    	atomic_set(&cache->stats.read_miss, stats.read_misses);
    	atomic_set(&cache->stats.write_hit, stats.write_hits);
    	atomic_set(&cache->stats.write_miss, stats.write_misses);
    }
    
    static void save_stats(struct cache *cache)
    {
    	struct dm_cache_statistics stats;
    
    	stats.read_hits = atomic_read(&cache->stats.read_hit);
    	stats.read_misses = atomic_read(&cache->stats.read_miss);
    	stats.write_hits = atomic_read(&cache->stats.write_hit);
    	stats.write_misses = atomic_read(&cache->stats.write_miss);
    
    	dm_cache_metadata_set_stats(cache->cmd, &stats);
    }
    
    /*----------------------------------------------------------------
     * Per bio data
     *--------------------------------------------------------------*/
    
    
    /*
     * If using writeback, leave out struct per_bio_data's writethrough fields.
     */
    #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
    #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
    
    static size_t get_per_bio_data_size(struct cache *cache)
    {
    	return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
    }
    
    static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
    
    Joe Thornber's avatar
    Joe Thornber committed
    {
    
    	struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
    
    Joe Thornber's avatar
    Joe Thornber committed
    	BUG_ON(!pb);
    	return pb;
    }
    
    
    static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
    
    Joe Thornber's avatar
    Joe Thornber committed
    {
    
    	struct per_bio_data *pb = get_per_bio_data(bio, data_size);
    
    Joe Thornber's avatar
    Joe Thornber committed
    
    	pb->tick = false;
    	pb->req_nr = dm_bio_get_target_bio_nr(bio);
    	pb->all_io_entry = NULL;
    
    	return pb;
    }
    
    /*----------------------------------------------------------------
     * Remapping
     *--------------------------------------------------------------*/
    static void remap_to_origin(struct cache *cache, struct bio *bio)
    {
    	bio->bi_bdev = cache->origin_dev->bdev;
    }
    
    static void remap_to_cache(struct cache *cache, struct bio *bio,
    			   dm_cblock_t cblock)
    {
    	sector_t bi_sector = bio->bi_sector;
    
    	bio->bi_bdev = cache->cache_dev->bdev;
    	if (!block_size_is_power_of_two(cache))
    		bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) +
    				sector_div(bi_sector, cache->sectors_per_block);
    	else
    		bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) |
    				(bi_sector & (cache->sectors_per_block - 1));
    }
    
    static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
    {
    	unsigned long flags;
    
    	size_t pb_data_size = get_per_bio_data_size(cache);
    	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
    
    Joe Thornber's avatar
    Joe Thornber committed
    
    	spin_lock_irqsave(&cache->lock, flags);
    	if (cache->need_tick_bio &&
    	    !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) {
    		pb->tick = true;
    		cache->need_tick_bio = false;
    	}
    	spin_unlock_irqrestore(&cache->lock, flags);
    }
    
    static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
    				  dm_oblock_t oblock)
    {
    	check_if_tick_bio_needed(cache, bio);
    	remap_to_origin(cache, bio);
    	if (bio_data_dir(bio) == WRITE)
    		clear_discard(cache, oblock_to_dblock(cache, oblock));
    }
    
    static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
    				 dm_oblock_t oblock, dm_cblock_t cblock)
    {
    	remap_to_cache(cache, bio, cblock);
    	if (bio_data_dir(bio) == WRITE) {
    		set_dirty(cache, oblock, cblock);
    		clear_discard(cache, oblock_to_dblock(cache, oblock));
    	}
    }
    
    static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
    {
    	sector_t block_nr = bio->bi_sector;
    
    	if (!block_size_is_power_of_two(cache))
    		(void) sector_div(block_nr, cache->sectors_per_block);
    	else
    		block_nr >>= cache->sectors_per_block_shift;
    
    	return to_oblock(block_nr);
    }
    
    static int bio_triggers_commit(struct cache *cache, struct bio *bio)
    {
    	return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
    }
    
    static void issue(struct cache *cache, struct bio *bio)
    {
    	unsigned long flags;
    
    	if (!bio_triggers_commit(cache, bio)) {
    		generic_make_request(bio);
    		return;
    	}
    
    	/*
    	 * Batch together any bios that trigger commits and then issue a
    	 * single commit for them in do_worker().
    	 */
    	spin_lock_irqsave(&cache->lock, flags);
    	cache->commit_requested = true;
    	bio_list_add(&cache->deferred_flush_bios, bio);
    	spin_unlock_irqrestore(&cache->lock, flags);
    }
    
    
    static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
    {
    	unsigned long flags;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	bio_list_add(&cache->deferred_writethrough_bios, bio);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	wake_worker(cache);
    }
    
    static void writethrough_endio(struct bio *bio, int err)
    {
    
    	struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
    
    	bio->bi_end_io = pb->saved_bi_end_io;
    
    	if (err) {
    		bio_endio(bio, err);
    		return;
    	}
    
    
    	dm_bio_restore(&pb->bio_details, bio);
    
    	remap_to_cache(pb->cache, bio, pb->cblock);
    
    	/*
    	 * We can't issue this bio directly, since we're in interrupt
    
    	 * context.  So it gets put on a bio list for processing by the
    
    	 * worker thread.
    	 */
    	defer_writethrough_bio(pb->cache, bio);
    }
    
    /*
     * When running in writethrough mode we need to send writes to clean blocks
     * to both the cache and origin devices.  In future we'd like to clone the
     * bio and send them in parallel, but for now we're doing them in
     * series as this is easier.
     */
    static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
    				       dm_oblock_t oblock, dm_cblock_t cblock)
    {
    
    	struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
    
    
    	pb->cache = cache;
    	pb->cblock = cblock;
    	pb->saved_bi_end_io = bio->bi_end_io;
    
    	dm_bio_record(&pb->bio_details, bio);
    
    	bio->bi_end_io = writethrough_endio;
    
    	remap_to_origin_clear_discard(pb->cache, bio, oblock);
    }
    
    
    Joe Thornber's avatar
    Joe Thornber committed
    /*----------------------------------------------------------------
     * Migration processing
     *
     * Migration covers moving data from the origin device to the cache, or
     * vice versa.
     *--------------------------------------------------------------*/
    static void free_migration(struct dm_cache_migration *mg)
    {
    	mempool_free(mg, mg->cache->migration_pool);
    }
    
    static void inc_nr_migrations(struct cache *cache)
    {
    	atomic_inc(&cache->nr_migrations);
    }
    
    static void dec_nr_migrations(struct cache *cache)
    {
    	atomic_dec(&cache->nr_migrations);
    
    	/*
    	 * Wake the worker in case we're suspending the target.
    	 */
    	wake_up(&cache->migration_wait);
    }
    
    static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
    			 bool holder)
    {
    	(holder ? dm_cell_release : dm_cell_release_no_holder)
    		(cache->prison, cell, &cache->deferred_bios);
    	free_prison_cell(cache, cell);
    }
    
    static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
    		       bool holder)
    {
    	unsigned long flags;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	__cell_defer(cache, cell, holder);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	wake_worker(cache);
    }
    
    static void cleanup_migration(struct dm_cache_migration *mg)
    {
    	dec_nr_migrations(mg->cache);
    	free_migration(mg);
    }
    
    static void migration_failure(struct dm_cache_migration *mg)
    {
    	struct cache *cache = mg->cache;
    
    	if (mg->writeback) {
    		DMWARN_LIMIT("writeback failed; couldn't copy block");
    		set_dirty(cache, mg->old_oblock, mg->cblock);
    		cell_defer(cache, mg->old_ocell, false);
    
    	} else if (mg->demote) {
    		DMWARN_LIMIT("demotion failed; couldn't copy block");
    		policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
    
    		cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1);
    		if (mg->promote)
    			cell_defer(cache, mg->new_ocell, 1);
    	} else {
    		DMWARN_LIMIT("promotion failed; couldn't copy block");
    		policy_remove_mapping(cache->policy, mg->new_oblock);
    		cell_defer(cache, mg->new_ocell, 1);
    	}
    
    	cleanup_migration(mg);
    }
    
    static void migration_success_pre_commit(struct dm_cache_migration *mg)
    {
    	unsigned long flags;
    	struct cache *cache = mg->cache;
    
    	if (mg->writeback) {
    		cell_defer(cache, mg->old_ocell, false);
    		clear_dirty(cache, mg->old_oblock, mg->cblock);
    		cleanup_migration(mg);
    		return;
    
    	} else if (mg->demote) {
    		if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) {
    			DMWARN_LIMIT("demotion failed; couldn't update on disk metadata");
    			policy_force_mapping(cache->policy, mg->new_oblock,
    					     mg->old_oblock);
    			if (mg->promote)
    				cell_defer(cache, mg->new_ocell, true);
    			cleanup_migration(mg);
    			return;
    		}
    	} else {
    		if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
    			DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
    			policy_remove_mapping(cache->policy, mg->new_oblock);
    			cleanup_migration(mg);
    			return;
    		}
    	}
    
    	spin_lock_irqsave(&cache->lock, flags);
    	list_add_tail(&mg->list, &cache->need_commit_migrations);
    	cache->commit_requested = true;
    	spin_unlock_irqrestore(&cache->lock, flags);
    }
    
    static void migration_success_post_commit(struct dm_cache_migration *mg)
    {
    	unsigned long flags;
    	struct cache *cache = mg->cache;
    
    	if (mg->writeback) {
    		DMWARN("writeback unexpectedly triggered commit");
    		return;
    
    	} else if (mg->demote) {
    		cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1);
    
    		if (mg->promote) {
    			mg->demote = false;
    
    			spin_lock_irqsave(&cache->lock, flags);
    			list_add_tail(&mg->list, &cache->quiesced_migrations);
    			spin_unlock_irqrestore(&cache->lock, flags);
    
    		} else
    			cleanup_migration(mg);
    
    	} else {
    		cell_defer(cache, mg->new_ocell, true);
    		clear_dirty(cache, mg->new_oblock, mg->cblock);
    		cleanup_migration(mg);
    	}
    }
    
    static void copy_complete(int read_err, unsigned long write_err, void *context)
    {
    	unsigned long flags;
    	struct dm_cache_migration *mg = (struct dm_cache_migration *) context;
    	struct cache *cache = mg->cache;
    
    	if (read_err || write_err)
    		mg->err = true;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	list_add_tail(&mg->list, &cache->completed_migrations);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	wake_worker(cache);
    }
    
    static void issue_copy_real(struct dm_cache_migration *mg)
    {
    	int r;
    	struct dm_io_region o_region, c_region;
    	struct cache *cache = mg->cache;
    
    	o_region.bdev = cache->origin_dev->bdev;
    	o_region.count = cache->sectors_per_block;
    
    	c_region.bdev = cache->cache_dev->bdev;
    	c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block;
    	c_region.count = cache->sectors_per_block;
    
    	if (mg->writeback || mg->demote) {
    		/* demote */
    		o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block;
    		r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg);
    	} else {
    		/* promote */
    		o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block;
    		r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
    	}
    
    	if (r < 0)
    		migration_failure(mg);
    }
    
    static void avoid_copy(struct dm_cache_migration *mg)
    {
    	atomic_inc(&mg->cache->stats.copies_avoided);
    	migration_success_pre_commit(mg);
    }
    
    static void issue_copy(struct dm_cache_migration *mg)
    {
    	bool avoid;
    	struct cache *cache = mg->cache;
    
    	if (mg->writeback || mg->demote)
    		avoid = !is_dirty(cache, mg->cblock) ||
    			is_discarded_oblock(cache, mg->old_oblock);
    	else
    		avoid = is_discarded_oblock(cache, mg->new_oblock);
    
    	avoid ? avoid_copy(mg) : issue_copy_real(mg);
    }
    
    static void complete_migration(struct dm_cache_migration *mg)
    {
    	if (mg->err)
    		migration_failure(mg);
    	else
    		migration_success_pre_commit(mg);
    }
    
    static void process_migrations(struct cache *cache, struct list_head *head,
    			       void (*fn)(struct dm_cache_migration *))
    {
    	unsigned long flags;
    	struct list_head list;
    	struct dm_cache_migration *mg, *tmp;
    
    	INIT_LIST_HEAD(&list);
    	spin_lock_irqsave(&cache->lock, flags);
    	list_splice_init(head, &list);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	list_for_each_entry_safe(mg, tmp, &list, list)
    		fn(mg);
    }
    
    static void __queue_quiesced_migration(struct dm_cache_migration *mg)
    {
    	list_add_tail(&mg->list, &mg->cache->quiesced_migrations);
    }
    
    static void queue_quiesced_migration(struct dm_cache_migration *mg)
    {
    	unsigned long flags;
    	struct cache *cache = mg->cache;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	__queue_quiesced_migration(mg);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	wake_worker(cache);
    }
    
    static void queue_quiesced_migrations(struct cache *cache, struct list_head *work)
    {
    	unsigned long flags;
    	struct dm_cache_migration *mg, *tmp;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	list_for_each_entry_safe(mg, tmp, work, list)
    		__queue_quiesced_migration(mg);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	wake_worker(cache);
    }
    
    static void check_for_quiesced_migrations(struct cache *cache,
    					  struct per_bio_data *pb)
    {
    	struct list_head work;
    
    	if (!pb->all_io_entry)
    		return;
    
    	INIT_LIST_HEAD(&work);
    	if (pb->all_io_entry)
    		dm_deferred_entry_dec(pb->all_io_entry, &work);
    
    	if (!list_empty(&work))
    		queue_quiesced_migrations(cache, &work);
    }
    
    static void quiesce_migration(struct dm_cache_migration *mg)
    {
    	if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list))
    		queue_quiesced_migration(mg);
    }
    
    static void promote(struct cache *cache, struct prealloc *structs,
    		    dm_oblock_t oblock, dm_cblock_t cblock,
    		    struct dm_bio_prison_cell *cell)
    {
    	struct dm_cache_migration *mg = prealloc_get_migration(structs);
    
    	mg->err = false;
    	mg->writeback = false;
    	mg->demote = false;
    	mg->promote = true;
    	mg->cache = cache;
    	mg->new_oblock = oblock;
    	mg->cblock = cblock;
    	mg->old_ocell = NULL;
    	mg->new_ocell = cell;
    	mg->start_jiffies = jiffies;
    
    	inc_nr_migrations(cache);
    	quiesce_migration(mg);