Skip to content
Snippets Groups Projects
dm-cache-target.c 75.1 KiB
Newer Older
	spin_lock_irqsave(&cache->lock, flags);
	list_add_tail(&mg->list, &cache->completed_migrations);
	spin_unlock_irqrestore(&cache->lock, flags);

	wake_worker(cache);
}

static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
{
	size_t pb_data_size = get_per_bio_data_size(mg->cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);

	dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
	remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
	generic_make_request(bio);
}

static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
{
	return (bio_data_dir(bio) == WRITE) &&
		(bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
Joe Thornber's avatar
Joe Thornber committed
static void avoid_copy(struct dm_cache_migration *mg)
{
	atomic_inc(&mg->cache->stats.copies_avoided);
	migration_success_pre_commit(mg);
}

static void issue_copy(struct dm_cache_migration *mg)
{
	bool avoid;
	struct cache *cache = mg->cache;

	if (mg->writeback || mg->demote)
		avoid = !is_dirty(cache, mg->cblock) ||
			is_discarded_oblock(cache, mg->old_oblock);
	else {
		struct bio *bio = mg->new_ocell->holder;

Joe Thornber's avatar
Joe Thornber committed
		avoid = is_discarded_oblock(cache, mg->new_oblock);

		if (!avoid && bio_writes_complete_block(cache, bio)) {
			issue_overwrite(mg, bio);
			return;
		}
	}

Joe Thornber's avatar
Joe Thornber committed
	avoid ? avoid_copy(mg) : issue_copy_real(mg);
}

static void complete_migration(struct dm_cache_migration *mg)
{
	if (mg->err)
		migration_failure(mg);
	else
		migration_success_pre_commit(mg);
}

static void process_migrations(struct cache *cache, struct list_head *head,
			       void (*fn)(struct dm_cache_migration *))
{
	unsigned long flags;
	struct list_head list;
	struct dm_cache_migration *mg, *tmp;

	INIT_LIST_HEAD(&list);
	spin_lock_irqsave(&cache->lock, flags);
	list_splice_init(head, &list);
	spin_unlock_irqrestore(&cache->lock, flags);

	list_for_each_entry_safe(mg, tmp, &list, list)
		fn(mg);
}

static void __queue_quiesced_migration(struct dm_cache_migration *mg)
{
	list_add_tail(&mg->list, &mg->cache->quiesced_migrations);
}

static void queue_quiesced_migration(struct dm_cache_migration *mg)
{
	unsigned long flags;
	struct cache *cache = mg->cache;

	spin_lock_irqsave(&cache->lock, flags);
	__queue_quiesced_migration(mg);
	spin_unlock_irqrestore(&cache->lock, flags);

	wake_worker(cache);
}

static void queue_quiesced_migrations(struct cache *cache, struct list_head *work)
{
	unsigned long flags;
	struct dm_cache_migration *mg, *tmp;

	spin_lock_irqsave(&cache->lock, flags);
	list_for_each_entry_safe(mg, tmp, work, list)
		__queue_quiesced_migration(mg);
	spin_unlock_irqrestore(&cache->lock, flags);

	wake_worker(cache);
}

static void check_for_quiesced_migrations(struct cache *cache,
					  struct per_bio_data *pb)
{
	struct list_head work;

	if (!pb->all_io_entry)
		return;

	INIT_LIST_HEAD(&work);
	if (pb->all_io_entry)
		dm_deferred_entry_dec(pb->all_io_entry, &work);

	if (!list_empty(&work))
		queue_quiesced_migrations(cache, &work);
}

static void quiesce_migration(struct dm_cache_migration *mg)
{
	if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list))
		queue_quiesced_migration(mg);
}

static void promote(struct cache *cache, struct prealloc *structs,
		    dm_oblock_t oblock, dm_cblock_t cblock,
		    struct dm_bio_prison_cell *cell)
{
	struct dm_cache_migration *mg = prealloc_get_migration(structs);

	mg->err = false;
	mg->writeback = false;
	mg->demote = false;
	mg->promote = true;
	mg->requeue_holder = true;
	mg->invalidate = false;
Joe Thornber's avatar
Joe Thornber committed
	mg->cache = cache;
	mg->new_oblock = oblock;
	mg->cblock = cblock;
	mg->old_ocell = NULL;
	mg->new_ocell = cell;
	mg->start_jiffies = jiffies;

	inc_nr_migrations(cache);
	quiesce_migration(mg);
}

static void writeback(struct cache *cache, struct prealloc *structs,
		      dm_oblock_t oblock, dm_cblock_t cblock,
		      struct dm_bio_prison_cell *cell)
{
	struct dm_cache_migration *mg = prealloc_get_migration(structs);

	mg->err = false;
	mg->writeback = true;
	mg->demote = false;
	mg->promote = false;
	mg->requeue_holder = true;
	mg->invalidate = false;
Joe Thornber's avatar
Joe Thornber committed
	mg->cache = cache;
	mg->old_oblock = oblock;
	mg->cblock = cblock;
	mg->old_ocell = cell;
	mg->new_ocell = NULL;
	mg->start_jiffies = jiffies;

	inc_nr_migrations(cache);
	quiesce_migration(mg);
}

static void demote_then_promote(struct cache *cache, struct prealloc *structs,
				dm_oblock_t old_oblock, dm_oblock_t new_oblock,
				dm_cblock_t cblock,
				struct dm_bio_prison_cell *old_ocell,
				struct dm_bio_prison_cell *new_ocell)
{
	struct dm_cache_migration *mg = prealloc_get_migration(structs);

	mg->err = false;
	mg->writeback = false;
	mg->demote = true;
	mg->promote = true;
	mg->requeue_holder = true;
	mg->invalidate = false;
Joe Thornber's avatar
Joe Thornber committed
	mg->cache = cache;
	mg->old_oblock = old_oblock;
	mg->new_oblock = new_oblock;
	mg->cblock = cblock;
	mg->old_ocell = old_ocell;
	mg->new_ocell = new_ocell;
	mg->start_jiffies = jiffies;

	inc_nr_migrations(cache);
	quiesce_migration(mg);
}

/*
 * Invalidate a cache entry.  No writeback occurs; any changes in the cache
 * block are thrown away.
 */
static void invalidate(struct cache *cache, struct prealloc *structs,
		       dm_oblock_t oblock, dm_cblock_t cblock,
		       struct dm_bio_prison_cell *cell)
{
	struct dm_cache_migration *mg = prealloc_get_migration(structs);

	mg->err = false;
	mg->writeback = false;
	mg->demote = true;
	mg->promote = false;
	mg->requeue_holder = true;
	mg->invalidate = true;
	mg->cache = cache;
	mg->old_oblock = oblock;
	mg->cblock = cblock;
	mg->old_ocell = cell;
	mg->new_ocell = NULL;
	mg->start_jiffies = jiffies;

	inc_nr_migrations(cache);
	quiesce_migration(mg);
}

Joe Thornber's avatar
Joe Thornber committed
/*----------------------------------------------------------------
 * bio processing
 *--------------------------------------------------------------*/
static void defer_bio(struct cache *cache, struct bio *bio)
{
	unsigned long flags;

	spin_lock_irqsave(&cache->lock, flags);
	bio_list_add(&cache->deferred_bios, bio);
	spin_unlock_irqrestore(&cache->lock, flags);

	wake_worker(cache);
}

static void process_flush_bio(struct cache *cache, struct bio *bio)
{
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
	BUG_ON(bio->bi_iter.bi_size);
Joe Thornber's avatar
Joe Thornber committed
	if (!pb->req_nr)
		remap_to_origin(cache, bio);
	else
		remap_to_cache(cache, bio, 0);

	issue(cache, bio);
}

/*
 * People generally discard large parts of a device, eg, the whole device
 * when formatting.  Splitting these large discards up into cache block
 * sized ios and then quiescing (always neccessary for discard) takes too
 * long.
 *
 * We keep it simple, and allow any size of discard to come in, and just
 * mark off blocks on the discard bitset.  No passdown occurs!
 *
 * To implement passdown we need to change the bio_prison such that a cell
 * can have a key that spans many blocks.
 */
static void process_discard_bio(struct cache *cache, struct bio *bio)
{
	dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
	dm_block_t end_block = bio_end_sector(bio);
Joe Thornber's avatar
Joe Thornber committed
	dm_block_t b;

	end_block = block_div(end_block, cache->sectors_per_block);
Joe Thornber's avatar
Joe Thornber committed

	for (b = start_block; b < end_block; b++)
		set_discard(cache, to_oblock(b));
Joe Thornber's avatar
Joe Thornber committed

	bio_endio(bio, 0);
}

static bool spare_migration_bandwidth(struct cache *cache)
{
	sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) *
		cache->sectors_per_block;
	return current_volume < cache->migration_threshold;
}

static void inc_hit_counter(struct cache *cache, struct bio *bio)
{
	atomic_inc(bio_data_dir(bio) == READ ?
		   &cache->stats.read_hit : &cache->stats.write_hit);
}

static void inc_miss_counter(struct cache *cache, struct bio *bio)
{
	atomic_inc(bio_data_dir(bio) == READ ?
		   &cache->stats.read_miss : &cache->stats.write_miss);
}

static void issue_cache_bio(struct cache *cache, struct bio *bio,
			    struct per_bio_data *pb,
			    dm_oblock_t oblock, dm_cblock_t cblock)
{
	pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
	remap_to_cache_dirty(cache, bio, oblock, cblock);
	issue(cache, bio);
}

Joe Thornber's avatar
Joe Thornber committed
static void process_bio(struct cache *cache, struct prealloc *structs,
			struct bio *bio)
{
	int r;
	bool release_cell = true;
	dm_oblock_t block = get_bio_block(cache, bio);
	struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
	struct policy_result lookup_result;
	size_t pb_data_size = get_per_bio_data_size(cache);
	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
Joe Thornber's avatar
Joe Thornber committed
	bool discarded_block = is_discarded_oblock(cache, block);
	bool passthrough = passthrough_mode(&cache->features);
	bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
Joe Thornber's avatar
Joe Thornber committed

	/*
	 * Check to see if that block is currently migrating.
	 */
	cell_prealloc = prealloc_get_cell(structs);
	r = bio_detain(cache, block, bio, cell_prealloc,
		       (cell_free_fn) prealloc_put_cell,
		       structs, &new_ocell);
	if (r > 0)
		return;

	r = policy_map(cache->policy, block, true, can_migrate, discarded_block,
		       bio, &lookup_result);

	if (r == -EWOULDBLOCK)
		/* migration has been denied */
		lookup_result.op = POLICY_MISS;

	switch (lookup_result.op) {
	case POLICY_HIT:
		if (passthrough) {
			inc_miss_counter(cache, bio);
			/*
			 * Passthrough always maps to the origin,
			 * invalidating any cache blocks that are written
			 * to.
			 */

			if (bio_data_dir(bio) == WRITE) {
				atomic_inc(&cache->stats.demotion);
				invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
				release_cell = false;

			} else {
				/* FIXME: factor out issue_origin() */
				pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
				remap_to_origin_clear_discard(cache, bio, block);
				issue(cache, bio);
			}
		} else {
			inc_hit_counter(cache, bio);

			if (bio_data_dir(bio) == WRITE &&
			    writethrough_mode(&cache->features) &&
			    !is_dirty(cache, lookup_result.cblock)) {
				pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
				remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
				issue(cache, bio);
			} else
				issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
		}
Joe Thornber's avatar
Joe Thornber committed

		break;

	case POLICY_MISS:
		inc_miss_counter(cache, bio);
		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
		remap_to_origin_clear_discard(cache, bio, block);
		issue(cache, bio);
Joe Thornber's avatar
Joe Thornber committed
		break;

	case POLICY_NEW:
		atomic_inc(&cache->stats.promotion);
		promote(cache, structs, block, lookup_result.cblock, new_ocell);
		release_cell = false;
		break;

	case POLICY_REPLACE:
		cell_prealloc = prealloc_get_cell(structs);
		r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc,
			       (cell_free_fn) prealloc_put_cell,
			       structs, &old_ocell);
		if (r > 0) {
			/*
			 * We have to be careful to avoid lock inversion of
			 * the cells.  So we back off, and wait for the
			 * old_ocell to become free.
			 */
			policy_force_mapping(cache->policy, block,
					     lookup_result.old_oblock);
			atomic_inc(&cache->stats.cache_cell_clash);
			break;
		}
		atomic_inc(&cache->stats.demotion);
		atomic_inc(&cache->stats.promotion);

		demote_then_promote(cache, structs, lookup_result.old_oblock,
				    block, lookup_result.cblock,
				    old_ocell, new_ocell);
		release_cell = false;
		break;

	default:
		DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__,
			    (unsigned) lookup_result.op);
		bio_io_error(bio);
	}

	if (release_cell)
		cell_defer(cache, new_ocell, false);
}

static int need_commit_due_to_time(struct cache *cache)
{
	return jiffies < cache->last_commit_jiffies ||
	       jiffies > cache->last_commit_jiffies + COMMIT_PERIOD;
}

static int commit_if_needed(struct cache *cache)
{
	int r = 0;

	if ((cache->commit_requested || need_commit_due_to_time(cache)) &&
	    dm_cache_changed_this_transaction(cache->cmd)) {
Joe Thornber's avatar
Joe Thornber committed
		atomic_inc(&cache->stats.commit_count);
		cache->commit_requested = false;
		r = dm_cache_commit(cache->cmd, false);
		cache->last_commit_jiffies = jiffies;
Joe Thornber's avatar
Joe Thornber committed
}

static void process_deferred_bios(struct cache *cache)
{
	unsigned long flags;
	struct bio_list bios;
	struct bio *bio;
	struct prealloc structs;

	memset(&structs, 0, sizeof(structs));
	bio_list_init(&bios);

	spin_lock_irqsave(&cache->lock, flags);
	bio_list_merge(&bios, &cache->deferred_bios);
	bio_list_init(&cache->deferred_bios);
	spin_unlock_irqrestore(&cache->lock, flags);

	while (!bio_list_empty(&bios)) {
		/*
		 * If we've got no free migration structs, and processing
		 * this bio might require one, we pause until there are some
		 * prepared mappings to process.
		 */
		if (prealloc_data_structs(cache, &structs)) {
			spin_lock_irqsave(&cache->lock, flags);
			bio_list_merge(&cache->deferred_bios, &bios);
			spin_unlock_irqrestore(&cache->lock, flags);
			break;
		}

		bio = bio_list_pop(&bios);

		if (bio->bi_rw & REQ_FLUSH)
			process_flush_bio(cache, bio);
		else if (bio->bi_rw & REQ_DISCARD)
			process_discard_bio(cache, bio);
		else
			process_bio(cache, &structs, bio);
	}

	prealloc_free_structs(cache, &structs);
}

static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
{
	unsigned long flags;
	struct bio_list bios;
	struct bio *bio;

	bio_list_init(&bios);

	spin_lock_irqsave(&cache->lock, flags);
	bio_list_merge(&bios, &cache->deferred_flush_bios);
	bio_list_init(&cache->deferred_flush_bios);
	spin_unlock_irqrestore(&cache->lock, flags);

	while ((bio = bio_list_pop(&bios)))
		submit_bios ? generic_make_request(bio) : bio_io_error(bio);
}

static void process_deferred_writethrough_bios(struct cache *cache)
{
	unsigned long flags;
	struct bio_list bios;
	struct bio *bio;

	bio_list_init(&bios);

	spin_lock_irqsave(&cache->lock, flags);
	bio_list_merge(&bios, &cache->deferred_writethrough_bios);
	bio_list_init(&cache->deferred_writethrough_bios);
	spin_unlock_irqrestore(&cache->lock, flags);

	while ((bio = bio_list_pop(&bios)))
		generic_make_request(bio);
}

Joe Thornber's avatar
Joe Thornber committed
static void writeback_some_dirty_blocks(struct cache *cache)
{
	int r = 0;
	dm_oblock_t oblock;
	dm_cblock_t cblock;
	struct prealloc structs;
	struct dm_bio_prison_cell *old_ocell;

	memset(&structs, 0, sizeof(structs));

	while (spare_migration_bandwidth(cache)) {
		if (prealloc_data_structs(cache, &structs))
			break;

		r = policy_writeback_work(cache->policy, &oblock, &cblock);
		if (r)
			break;

		r = get_cell(cache, oblock, &structs, &old_ocell);
		if (r) {
			policy_set_dirty(cache->policy, oblock);
			break;
		}

		writeback(cache, &structs, oblock, cblock, old_ocell);
	}

	prealloc_free_structs(cache, &structs);
}

/*----------------------------------------------------------------
 * Invalidations.
 * Dropping something from the cache *without* writing back.
 *--------------------------------------------------------------*/

static void process_invalidation_request(struct cache *cache, struct invalidation_request *req)
{
	int r = 0;
	uint64_t begin = from_cblock(req->cblocks->begin);
	uint64_t end = from_cblock(req->cblocks->end);

	while (begin != end) {
		r = policy_remove_cblock(cache->policy, to_cblock(begin));
		if (!r) {
			r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
			if (r)
				break;

		} else if (r == -ENODATA) {
			/* harmless, already unmapped */
			r = 0;

		} else {
			DMERR("policy_remove_cblock failed");
			break;
		}

		begin++;
        }

	cache->commit_requested = true;

	req->err = r;
	atomic_set(&req->complete, 1);

	wake_up(&req->result_wait);
}

static void process_invalidation_requests(struct cache *cache)
{
	struct list_head list;
	struct invalidation_request *req, *tmp;

	INIT_LIST_HEAD(&list);
	spin_lock(&cache->invalidation_lock);
	list_splice_init(&cache->invalidation_requests, &list);
	spin_unlock(&cache->invalidation_lock);

	list_for_each_entry_safe (req, tmp, &list, list)
		process_invalidation_request(cache, req);
}

Joe Thornber's avatar
Joe Thornber committed
/*----------------------------------------------------------------
 * Main worker loop
 *--------------------------------------------------------------*/
Joe Thornber's avatar
Joe Thornber committed
{
	return atomic_read(&cache->quiescing);
static void ack_quiescing(struct cache *cache)
{
	if (is_quiescing(cache)) {
		atomic_inc(&cache->quiescing_ack);
		wake_up(&cache->quiescing_wait);
	}
}

static void wait_for_quiescing_ack(struct cache *cache)
{
	wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack));
}

static void start_quiescing(struct cache *cache)
Joe Thornber's avatar
Joe Thornber committed
{
	atomic_inc(&cache->quiescing);
static void stop_quiescing(struct cache *cache)
Joe Thornber's avatar
Joe Thornber committed
{
	atomic_set(&cache->quiescing, 0);
Joe Thornber's avatar
Joe Thornber committed
}

static void wait_for_migrations(struct cache *cache)
{
	wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations));
}

static void stop_worker(struct cache *cache)
{
	cancel_delayed_work(&cache->waker);
	flush_workqueue(cache->wq);
}

static void requeue_deferred_io(struct cache *cache)
{
	struct bio *bio;
	struct bio_list bios;

	bio_list_init(&bios);
	bio_list_merge(&bios, &cache->deferred_bios);
	bio_list_init(&cache->deferred_bios);

	while ((bio = bio_list_pop(&bios)))
		bio_endio(bio, DM_ENDIO_REQUEUE);
}

static int more_work(struct cache *cache)
{
	if (is_quiescing(cache))
		return !list_empty(&cache->quiesced_migrations) ||
			!list_empty(&cache->completed_migrations) ||
			!list_empty(&cache->need_commit_migrations);
	else
		return !bio_list_empty(&cache->deferred_bios) ||
			!bio_list_empty(&cache->deferred_flush_bios) ||
			!bio_list_empty(&cache->deferred_writethrough_bios) ||
Joe Thornber's avatar
Joe Thornber committed
			!list_empty(&cache->quiesced_migrations) ||
			!list_empty(&cache->completed_migrations) ||
			!list_empty(&cache->need_commit_migrations) ||
			cache->invalidate;
Joe Thornber's avatar
Joe Thornber committed
}

static void do_worker(struct work_struct *ws)
{
	struct cache *cache = container_of(ws, struct cache, worker);

	do {
		if (!is_quiescing(cache)) {
			writeback_some_dirty_blocks(cache);
			process_deferred_writethrough_bios(cache);
Joe Thornber's avatar
Joe Thornber committed
			process_deferred_bios(cache);
			process_invalidation_requests(cache);
Joe Thornber's avatar
Joe Thornber committed

		process_migrations(cache, &cache->quiesced_migrations, issue_copy);
		process_migrations(cache, &cache->completed_migrations, complete_migration);

		if (commit_if_needed(cache)) {
			process_deferred_flush_bios(cache, false);

			/*
			 * FIXME: rollback metadata or just go into a
			 * failure mode and error everything
			 */
		} else {
			process_deferred_flush_bios(cache, true);
			process_migrations(cache, &cache->need_commit_migrations,
					   migration_success_post_commit);
		}
Joe Thornber's avatar
Joe Thornber committed
	} while (more_work(cache));
}

/*
 * We want to commit periodically so that not too much
 * unwritten metadata builds up.
 */
static void do_waker(struct work_struct *ws)
{
	struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
	policy_tick(cache->policy);
Joe Thornber's avatar
Joe Thornber committed
	wake_worker(cache);
	queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
}

/*----------------------------------------------------------------*/

static int is_congested(struct dm_dev *dev, int bdi_bits)
{
	struct request_queue *q = bdev_get_queue(dev->bdev);
	return bdi_congested(&q->backing_dev_info, bdi_bits);
}

static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
{
	struct cache *cache = container_of(cb, struct cache, callbacks);

	return is_congested(cache->origin_dev, bdi_bits) ||
		is_congested(cache->cache_dev, bdi_bits);
}

/*----------------------------------------------------------------
 * Target methods
 *--------------------------------------------------------------*/

/*
 * This function gets called on the error paths of the constructor, so we
 * have to cope with a partially initialised struct.
 */
static void destroy(struct cache *cache)
{
	unsigned i;

	if (cache->next_migration)
		mempool_free(cache->next_migration, cache->migration_pool);

	if (cache->migration_pool)
		mempool_destroy(cache->migration_pool);

	if (cache->all_io_ds)
		dm_deferred_set_destroy(cache->all_io_ds);

	if (cache->prison)
		dm_bio_prison_destroy(cache->prison);

	if (cache->wq)
		destroy_workqueue(cache->wq);

	if (cache->dirty_bitset)
		free_bitset(cache->dirty_bitset);

	if (cache->discard_bitset)
		free_bitset(cache->discard_bitset);

	if (cache->copier)
		dm_kcopyd_client_destroy(cache->copier);

	if (cache->cmd)
		dm_cache_metadata_close(cache->cmd);

	if (cache->metadata_dev)
		dm_put_device(cache->ti, cache->metadata_dev);

	if (cache->origin_dev)
		dm_put_device(cache->ti, cache->origin_dev);

	if (cache->cache_dev)
		dm_put_device(cache->ti, cache->cache_dev);

	if (cache->policy)
		dm_cache_policy_destroy(cache->policy);

	for (i = 0; i < cache->nr_ctr_args ; i++)
		kfree(cache->ctr_args[i]);
	kfree(cache->ctr_args);

	kfree(cache);
}

static void cache_dtr(struct dm_target *ti)
{
	struct cache *cache = ti->private;

	destroy(cache);
}

static sector_t get_dev_size(struct dm_dev *dev)
{
	return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
}

/*----------------------------------------------------------------*/

/*
 * Construct a cache device mapping.
 *
 * cache <metadata dev> <cache dev> <origin dev> <block size>
 *       <#feature args> [<feature arg>]*
 *       <policy> <#policy args> [<policy arg>]*
 *
 * metadata dev    : fast device holding the persistent metadata
 * cache dev	   : fast device holding cached data blocks
 * origin dev	   : slow device holding original data blocks
 * block size	   : cache unit size in sectors
 *
 * #feature args   : number of feature arguments passed
 * feature args    : writethrough.  (The default is writeback.)
 *
 * policy	   : the replacement policy to use
 * #policy args    : an even number of policy arguments corresponding
 *		     to key/value pairs passed to the policy
 * policy args	   : key/value pairs passed to the policy
 *		     E.g. 'sequential_threshold 1024'
 *		     See cache-policies.txt for details.
 *
 * Optional feature arguments are:
 *   writethrough  : write through caching that prohibits cache block
 *		     content from being different from origin block content.
 *		     Without this argument, the default behaviour is to write
 *		     back cache block contents later for performance reasons,
 *		     so they may differ from the corresponding origin blocks.
 */
struct cache_args {
	struct dm_target *ti;

	struct dm_dev *metadata_dev;

	struct dm_dev *cache_dev;
	sector_t cache_sectors;

	struct dm_dev *origin_dev;
	sector_t origin_sectors;

	uint32_t block_size;

	const char *policy_name;
	int policy_argc;
	const char **policy_argv;

	struct cache_features features;
};

static void destroy_cache_args(struct cache_args *ca)
{
	if (ca->metadata_dev)
		dm_put_device(ca->ti, ca->metadata_dev);

	if (ca->cache_dev)
		dm_put_device(ca->ti, ca->cache_dev);

	if (ca->origin_dev)
		dm_put_device(ca->ti, ca->origin_dev);

	kfree(ca);
}

static bool at_least_one_arg(struct dm_arg_set *as, char **error)
{
	if (!as->argc) {
		*error = "Insufficient args";
		return false;
	}

	return true;
}

static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
			      char **error)
{
	int r;
	sector_t metadata_dev_size;
	char b[BDEVNAME_SIZE];

	if (!at_least_one_arg(as, error))
		return -EINVAL;

	r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
			  &ca->metadata_dev);
	if (r) {
		*error = "Error opening metadata device";
		return r;
	}

	metadata_dev_size = get_dev_size(ca->metadata_dev);
	if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
		DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
		       bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);

	return 0;
}

static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
			   char **error)
{
	int r;

	if (!at_least_one_arg(as, error))
		return -EINVAL;

	r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
			  &ca->cache_dev);
	if (r) {
		*error = "Error opening cache device";
		return r;
	}
	ca->cache_sectors = get_dev_size(ca->cache_dev);

	return 0;
}

static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
			    char **error)
{
	int r;

	if (!at_least_one_arg(as, error))
		return -EINVAL;

	r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
			  &ca->origin_dev);
	if (r) {
		*error = "Error opening origin device";
		return r;
	}

	ca->origin_sectors = get_dev_size(ca->origin_dev);
	if (ca->ti->len > ca->origin_sectors) {
		*error = "Device size larger than cached device";
		return -EINVAL;
	}

	return 0;
}

static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
			    char **error)
{
Joe Thornber's avatar
Joe Thornber committed

	if (!at_least_one_arg(as, error))
		return -EINVAL;

	if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
	    block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
	    block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
	    block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
Joe Thornber's avatar
Joe Thornber committed
		*error = "Invalid data block size";
		return -EINVAL;
	}

	if (block_size > ca->cache_sectors) {
Joe Thornber's avatar
Joe Thornber committed
		*error = "Data block size is larger than the cache device";
		return -EINVAL;
	}

	ca->block_size = block_size;
Joe Thornber's avatar
Joe Thornber committed

	return 0;
}

static void init_features(struct cache_features *cf)
{
	cf->mode = CM_WRITE;
	cf->io_mode = CM_IO_WRITEBACK;
Joe Thornber's avatar
Joe Thornber committed
}

static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
			  char **error)
{
	static struct dm_arg _args[] = {
		{0, 1, "Invalid number of cache feature arguments"},
	};

	int r;
	unsigned argc;
	const char *arg;
	struct cache_features *cf = &ca->features;

	init_features(cf);