Skip to content
Snippets Groups Projects
dm-cache-target.c 64.2 KiB
Newer Older
  • Learn to ignore specific revisions
  • Joe Thornber's avatar
    Joe Thornber committed
    
    	cache->discard_block_size =
    		calculate_discard_block_size(cache->sectors_per_block,
    					     cache->origin_sectors);
    	cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks);
    	cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
    	if (!cache->discard_bitset) {
    		*error = "could not allocate discard bitset";
    		goto bad;
    	}
    	clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
    
    	cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
    	if (IS_ERR(cache->copier)) {
    		*error = "could not create kcopyd client";
    		r = PTR_ERR(cache->copier);
    		goto bad;
    	}
    
    	cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
    	if (!cache->wq) {
    		*error = "could not create workqueue for metadata object";
    		goto bad;
    	}
    	INIT_WORK(&cache->worker, do_worker);
    	INIT_DELAYED_WORK(&cache->waker, do_waker);
    	cache->last_commit_jiffies = jiffies;
    
    	cache->prison = dm_bio_prison_create(PRISON_CELLS);
    	if (!cache->prison) {
    		*error = "could not create bio prison";
    		goto bad;
    	}
    
    	cache->all_io_ds = dm_deferred_set_create();
    	if (!cache->all_io_ds) {
    		*error = "could not create all_io deferred set";
    		goto bad;
    	}
    
    	cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
    							 migration_cache);
    	if (!cache->migration_pool) {
    		*error = "Error creating cache's migration mempool";
    		goto bad;
    	}
    
    	cache->next_migration = NULL;
    
    	cache->need_tick_bio = true;
    	cache->sized = false;
    	cache->quiescing = false;
    	cache->commit_requested = false;
    	cache->loaded_mappings = false;
    	cache->loaded_discards = false;
    
    	load_stats(cache);
    
    	atomic_set(&cache->stats.demotion, 0);
    	atomic_set(&cache->stats.promotion, 0);
    	atomic_set(&cache->stats.copies_avoided, 0);
    	atomic_set(&cache->stats.cache_cell_clash, 0);
    	atomic_set(&cache->stats.commit_count, 0);
    	atomic_set(&cache->stats.discard_count, 0);
    
    	*result = cache;
    	return 0;
    
    bad:
    	destroy(cache);
    	return r;
    }
    
    static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
    {
    	unsigned i;
    	const char **copy;
    
    	copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
    	if (!copy)
    		return -ENOMEM;
    	for (i = 0; i < argc; i++) {
    		copy[i] = kstrdup(argv[i], GFP_KERNEL);
    		if (!copy[i]) {
    			while (i--)
    				kfree(copy[i]);
    			kfree(copy);
    			return -ENOMEM;
    		}
    	}
    
    	cache->nr_ctr_args = argc;
    	cache->ctr_args = copy;
    
    	return 0;
    }
    
    static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
    {
    	int r = -EINVAL;
    	struct cache_args *ca;
    	struct cache *cache = NULL;
    
    	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
    	if (!ca) {
    		ti->error = "Error allocating memory for cache";
    		return -ENOMEM;
    	}
    	ca->ti = ti;
    
    	r = parse_cache_args(ca, argc, argv, &ti->error);
    	if (r)
    		goto out;
    
    	r = cache_create(ca, &cache);
    
    Joe Thornber's avatar
    Joe Thornber committed
    
    	r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
    	if (r) {
    		destroy(cache);
    		goto out;
    	}
    
    	ti->private = cache;
    
    out:
    	destroy_cache_args(ca);
    	return r;
    }
    
    static int cache_map(struct dm_target *ti, struct bio *bio)
    {
    	struct cache *cache = ti->private;
    
    	int r;
    	dm_oblock_t block = get_bio_block(cache, bio);
    
    	size_t pb_data_size = get_per_bio_data_size(cache);
    
    Joe Thornber's avatar
    Joe Thornber committed
    	bool can_migrate = false;
    	bool discarded_block;
    	struct dm_bio_prison_cell *cell;
    	struct policy_result lookup_result;
    	struct per_bio_data *pb;
    
    	if (from_oblock(block) > from_oblock(cache->origin_blocks)) {
    		/*
    		 * This can only occur if the io goes to a partial block at
    		 * the end of the origin device.  We don't cache these.
    		 * Just remap to the origin and carry on.
    		 */
    		remap_to_origin_clear_discard(cache, bio, block);
    		return DM_MAPIO_REMAPPED;
    	}
    
    
    	pb = init_per_bio_data(bio, pb_data_size);
    
    Joe Thornber's avatar
    Joe Thornber committed
    
    	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) {
    		defer_bio(cache, bio);
    		return DM_MAPIO_SUBMITTED;
    	}
    
    	/*
    	 * Check to see if that block is currently migrating.
    	 */
    	cell = alloc_prison_cell(cache);
    	if (!cell) {
    		defer_bio(cache, bio);
    		return DM_MAPIO_SUBMITTED;
    	}
    
    	r = bio_detain(cache, block, bio, cell,
    		       (cell_free_fn) free_prison_cell,
    		       cache, &cell);
    	if (r) {
    		if (r < 0)
    			defer_bio(cache, bio);
    
    		return DM_MAPIO_SUBMITTED;
    	}
    
    	discarded_block = is_discarded_oblock(cache, block);
    
    	r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
    		       bio, &lookup_result);
    	if (r == -EWOULDBLOCK) {
    		cell_defer(cache, cell, true);
    		return DM_MAPIO_SUBMITTED;
    
    	} else if (r) {
    		DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
    		bio_io_error(bio);
    		return DM_MAPIO_SUBMITTED;
    	}
    
    	switch (lookup_result.op) {
    	case POLICY_HIT:
    		inc_hit_counter(cache, bio);
    		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
    
    
    		if (is_writethrough_io(cache, bio, lookup_result.cblock))
    			remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
    		else
    
    Joe Thornber's avatar
    Joe Thornber committed
    			remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
    
    
    		cell_defer(cache, cell, false);
    
    Joe Thornber's avatar
    Joe Thornber committed
    		break;
    
    	case POLICY_MISS:
    		inc_miss_counter(cache, bio);
    		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
    
    		if (pb->req_nr != 0) {
    			/*
    			 * This is a duplicate writethrough io that is no
    			 * longer needed because the block has been demoted.
    			 */
    			bio_endio(bio, 0);
    			cell_defer(cache, cell, false);
    			return DM_MAPIO_SUBMITTED;
    		} else {
    			remap_to_origin_clear_discard(cache, bio, block);
    			cell_defer(cache, cell, false);
    		}
    		break;
    
    	default:
    		DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
    			    (unsigned) lookup_result.op);
    		bio_io_error(bio);
    		return DM_MAPIO_SUBMITTED;
    	}
    
    	return DM_MAPIO_REMAPPED;
    }
    
    static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
    {
    	struct cache *cache = ti->private;
    	unsigned long flags;
    
    	size_t pb_data_size = get_per_bio_data_size(cache);
    	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
    
    Joe Thornber's avatar
    Joe Thornber committed
    
    	if (pb->tick) {
    		policy_tick(cache->policy);
    
    		spin_lock_irqsave(&cache->lock, flags);
    		cache->need_tick_bio = true;
    		spin_unlock_irqrestore(&cache->lock, flags);
    	}
    
    	check_for_quiesced_migrations(cache, pb);
    
    	return 0;
    }
    
    static int write_dirty_bitset(struct cache *cache)
    {
    	unsigned i, r;
    
    	for (i = 0; i < from_cblock(cache->cache_size); i++) {
    		r = dm_cache_set_dirty(cache->cmd, to_cblock(i),
    				       is_dirty(cache, to_cblock(i)));
    		if (r)
    			return r;
    	}
    
    	return 0;
    }
    
    static int write_discard_bitset(struct cache *cache)
    {
    	unsigned i, r;
    
    	r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
    					   cache->discard_nr_blocks);
    	if (r) {
    		DMERR("could not resize on-disk discard bitset");
    		return r;
    	}
    
    	for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
    		r = dm_cache_set_discard(cache->cmd, to_dblock(i),
    					 is_discarded(cache, to_dblock(i)));
    		if (r)
    			return r;
    	}
    
    	return 0;
    }
    
    static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock,
    		     uint32_t hint)
    {
    	struct cache *cache = context;
    	return dm_cache_save_hint(cache->cmd, cblock, hint);
    }
    
    static int write_hints(struct cache *cache)
    {
    	int r;
    
    	r = dm_cache_begin_hints(cache->cmd, cache->policy);
    	if (r) {
    		DMERR("dm_cache_begin_hints failed");
    		return r;
    	}
    
    	r = policy_walk_mappings(cache->policy, save_hint, cache);
    	if (r)
    		DMERR("policy_walk_mappings failed");
    
    	return r;
    }
    
    /*
     * returns true on success
     */
    static bool sync_metadata(struct cache *cache)
    {
    	int r1, r2, r3, r4;
    
    	r1 = write_dirty_bitset(cache);
    	if (r1)
    		DMERR("could not write dirty bitset");
    
    	r2 = write_discard_bitset(cache);
    	if (r2)
    		DMERR("could not write discard bitset");
    
    	save_stats(cache);
    
    	r3 = write_hints(cache);
    	if (r3)
    		DMERR("could not write hints");
    
    	/*
    	 * If writing the above metadata failed, we still commit, but don't
    	 * set the clean shutdown flag.  This will effectively force every
    	 * dirty bit to be set on reload.
    	 */
    	r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3);
    	if (r4)
    		DMERR("could not write cache metadata.  Data loss may occur.");
    
    	return !r1 && !r2 && !r3 && !r4;
    }
    
    static void cache_postsuspend(struct dm_target *ti)
    {
    	struct cache *cache = ti->private;
    
    	start_quiescing(cache);
    	wait_for_migrations(cache);
    	stop_worker(cache);
    	requeue_deferred_io(cache);
    	stop_quiescing(cache);
    
    	(void) sync_metadata(cache);
    }
    
    static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
    			bool dirty, uint32_t hint, bool hint_valid)
    {
    	int r;
    	struct cache *cache = context;
    
    	r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid);
    	if (r)
    		return r;
    
    	if (dirty)
    		set_dirty(cache, oblock, cblock);
    	else
    		clear_dirty(cache, oblock, cblock);
    
    	return 0;
    }
    
    static int load_discard(void *context, sector_t discard_block_size,
    			dm_dblock_t dblock, bool discard)
    {
    	struct cache *cache = context;
    
    	/* FIXME: handle mis-matched block size */
    
    	if (discard)
    		set_discard(cache, dblock);
    	else
    		clear_discard(cache, dblock);
    
    	return 0;
    }
    
    static int cache_preresume(struct dm_target *ti)
    {
    	int r = 0;
    	struct cache *cache = ti->private;
    	sector_t actual_cache_size = get_dev_size(cache->cache_dev);
    	(void) sector_div(actual_cache_size, cache->sectors_per_block);
    
    	/*
    	 * Check to see if the cache has resized.
    	 */
    	if (from_cblock(cache->cache_size) != actual_cache_size || !cache->sized) {
    		cache->cache_size = to_cblock(actual_cache_size);
    
    		r = dm_cache_resize(cache->cmd, cache->cache_size);
    		if (r) {
    			DMERR("could not resize cache metadata");
    			return r;
    		}
    
    		cache->sized = true;
    	}
    
    	if (!cache->loaded_mappings) {
    
    		r = dm_cache_load_mappings(cache->cmd, cache->policy,
    
    Joe Thornber's avatar
    Joe Thornber committed
    					   load_mapping, cache);
    		if (r) {
    			DMERR("could not load cache mappings");
    			return r;
    		}
    
    		cache->loaded_mappings = true;
    	}
    
    	if (!cache->loaded_discards) {
    		r = dm_cache_load_discards(cache->cmd, load_discard, cache);
    		if (r) {
    			DMERR("could not load origin discards");
    			return r;
    		}
    
    		cache->loaded_discards = true;
    	}
    
    	return r;
    }
    
    static void cache_resume(struct dm_target *ti)
    {
    	struct cache *cache = ti->private;
    
    	cache->need_tick_bio = true;
    	do_waker(&cache->waker.work);
    }
    
    /*
     * Status format:
     *
     * <#used metadata blocks>/<#total metadata blocks>
     * <#read hits> <#read misses> <#write hits> <#write misses>
     * <#demotions> <#promotions> <#blocks in cache> <#dirty>
     * <#features> <features>*
     * <#core args> <core args>
     * <#policy args> <policy args>*
     */
    static void cache_status(struct dm_target *ti, status_type_t type,
    			 unsigned status_flags, char *result, unsigned maxlen)
    {
    	int r = 0;
    	unsigned i;
    	ssize_t sz = 0;
    	dm_block_t nr_free_blocks_metadata = 0;
    	dm_block_t nr_blocks_metadata = 0;
    	char buf[BDEVNAME_SIZE];
    	struct cache *cache = ti->private;
    	dm_cblock_t residency;
    
    	switch (type) {
    	case STATUSTYPE_INFO:
    		/* Commit to ensure statistics aren't out-of-date */
    		if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) {
    			r = dm_cache_commit(cache->cmd, false);
    			if (r)
    				DMERR("could not commit metadata for accurate status");
    		}
    
    		r = dm_cache_get_free_metadata_block_count(cache->cmd,
    							   &nr_free_blocks_metadata);
    		if (r) {
    			DMERR("could not get metadata free block count");
    			goto err;
    		}
    
    		r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
    		if (r) {
    			DMERR("could not get metadata device size");
    			goto err;
    		}
    
    		residency = policy_residency(cache->policy);
    
    		DMEMIT("%llu/%llu %u %u %u %u %u %u %llu %u ",
    		       (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
    		       (unsigned long long)nr_blocks_metadata,
    		       (unsigned) atomic_read(&cache->stats.read_hit),
    		       (unsigned) atomic_read(&cache->stats.read_miss),
    		       (unsigned) atomic_read(&cache->stats.write_hit),
    		       (unsigned) atomic_read(&cache->stats.write_miss),
    		       (unsigned) atomic_read(&cache->stats.demotion),
    		       (unsigned) atomic_read(&cache->stats.promotion),
    		       (unsigned long long) from_cblock(residency),
    		       cache->nr_dirty);
    
    		if (cache->features.write_through)
    			DMEMIT("1 writethrough ");
    		else
    			DMEMIT("0 ");
    
    		DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
    		if (sz < maxlen) {
    			r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz);
    			if (r)
    				DMERR("policy_emit_config_values returned %d", r);
    		}
    
    		break;
    
    	case STATUSTYPE_TABLE:
    		format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
    		DMEMIT("%s ", buf);
    		format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
    		DMEMIT("%s ", buf);
    		format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
    		DMEMIT("%s", buf);
    
    		for (i = 0; i < cache->nr_ctr_args - 1; i++)
    			DMEMIT(" %s", cache->ctr_args[i]);
    		if (cache->nr_ctr_args)
    			DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
    	}
    
    	return;
    
    err:
    	DMEMIT("Error");
    }
    
    /*
     * Supports <key> <value>.
     *
     * The key migration_threshold is supported by the cache target core.
     */
    static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
    {
    	int r;
    	struct cache *cache = ti->private;
    
    	if (argc != 2)
    		return -EINVAL;
    
    	r = process_config_option(cache, argv);
    	if (r == NOT_CORE_OPTION)
    		return policy_set_config_value(cache->policy, argv[0], argv[1]);
    
    	return r;
    }
    
    static int cache_iterate_devices(struct dm_target *ti,
    				 iterate_devices_callout_fn fn, void *data)
    {
    	int r = 0;
    	struct cache *cache = ti->private;
    
    	r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
    	if (!r)
    		r = fn(ti, cache->origin_dev, 0, ti->len, data);
    
    	return r;
    }
    
    /*
     * We assume I/O is going to the origin (which is the volume
     * more likely to have restrictions e.g. by being striped).
     * (Looking up the exact location of the data would be expensive
     * and could always be out of date by the time the bio is submitted.)
     */
    static int cache_bvec_merge(struct dm_target *ti,
    			    struct bvec_merge_data *bvm,
    			    struct bio_vec *biovec, int max_size)
    {
    	struct cache *cache = ti->private;
    	struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev);
    
    	if (!q->merge_bvec_fn)
    		return max_size;
    
    	bvm->bi_bdev = cache->origin_dev->bdev;
    	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
    }
    
    static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
    {
    	/*
    	 * FIXME: these limits may be incompatible with the cache device
    	 */
    	limits->max_discard_sectors = cache->discard_block_size * 1024;
    	limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
    }
    
    static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
    {
    	struct cache *cache = ti->private;
    
    	blk_limits_io_min(limits, 0);
    	blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
    	set_discard_limits(cache, limits);
    }
    
    /*----------------------------------------------------------------*/
    
    static struct target_type cache_target = {
    	.name = "cache",
    
    	.version = {1, 1, 0},
    
    Joe Thornber's avatar
    Joe Thornber committed
    	.module = THIS_MODULE,
    	.ctr = cache_ctr,
    	.dtr = cache_dtr,
    	.map = cache_map,
    	.end_io = cache_end_io,
    	.postsuspend = cache_postsuspend,
    	.preresume = cache_preresume,
    	.resume = cache_resume,
    	.status = cache_status,
    	.message = cache_message,
    	.iterate_devices = cache_iterate_devices,
    	.merge = cache_bvec_merge,
    	.io_hints = cache_io_hints,
    };
    
    static int __init dm_cache_init(void)
    {
    	int r;
    
    	r = dm_register_target(&cache_target);
    	if (r) {
    		DMERR("cache target registration failed: %d", r);
    		return r;
    	}
    
    	migration_cache = KMEM_CACHE(dm_cache_migration, 0);
    	if (!migration_cache) {
    		dm_unregister_target(&cache_target);
    		return -ENOMEM;
    	}
    
    	return 0;
    }
    
    static void __exit dm_cache_exit(void)
    {
    	dm_unregister_target(&cache_target);
    	kmem_cache_destroy(migration_cache);
    }
    
    module_init(dm_cache_init);
    module_exit(dm_cache_exit);
    
    MODULE_DESCRIPTION(DM_NAME " cache target");
    MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
    MODULE_LICENSE("GPL");