Skip to content
Snippets Groups Projects
dm-cache-target.c 62.2 KiB
Newer Older
  • Learn to ignore specific revisions
  • Joe Thornber's avatar
    Joe Thornber committed
    /*
     * Copyright (C) 2012 Red Hat. All rights reserved.
     *
     * This file is released under the GPL.
     */
    
    #include "dm.h"
    #include "dm-bio-prison.h"
    #include "dm-cache-metadata.h"
    
    #include <linux/dm-io.h>
    #include <linux/dm-kcopyd.h>
    #include <linux/init.h>
    #include <linux/mempool.h>
    #include <linux/module.h>
    #include <linux/slab.h>
    #include <linux/vmalloc.h>
    
    #define DM_MSG_PREFIX "cache"
    
    DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
    	"A percentage of time allocated for copying to and/or from cache");
    
    /*----------------------------------------------------------------*/
    
    /*
     * Glossary:
     *
     * oblock: index of an origin block
     * cblock: index of a cache block
     * promotion: movement of a block from origin to cache
     * demotion: movement of a block from cache to origin
     * migration: movement of a block between the origin and cache device,
     *	      either direction
     */
    
    /*----------------------------------------------------------------*/
    
    static size_t bitset_size_in_bytes(unsigned nr_entries)
    {
    	return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
    }
    
    static unsigned long *alloc_bitset(unsigned nr_entries)
    {
    	size_t s = bitset_size_in_bytes(nr_entries);
    	return vzalloc(s);
    }
    
    static void clear_bitset(void *bitset, unsigned nr_entries)
    {
    	size_t s = bitset_size_in_bytes(nr_entries);
    	memset(bitset, 0, s);
    }
    
    static void free_bitset(unsigned long *bits)
    {
    	vfree(bits);
    }
    
    /*----------------------------------------------------------------*/
    
    #define PRISON_CELLS 1024
    #define MIGRATION_POOL_SIZE 128
    #define COMMIT_PERIOD HZ
    #define MIGRATION_COUNT_WINDOW 10
    
    /*
     * The block size of the device holding cache data must be >= 32KB
     */
    #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
    
    /*
     * FIXME: the cache is read/write for the time being.
     */
    enum cache_mode {
    	CM_WRITE,		/* metadata may be changed */
    	CM_READ_ONLY,		/* metadata may not be changed */
    };
    
    struct cache_features {
    	enum cache_mode mode;
    	bool write_through:1;
    };
    
    struct cache_stats {
    	atomic_t read_hit;
    	atomic_t read_miss;
    	atomic_t write_hit;
    	atomic_t write_miss;
    	atomic_t demotion;
    	atomic_t promotion;
    	atomic_t copies_avoided;
    	atomic_t cache_cell_clash;
    	atomic_t commit_count;
    	atomic_t discard_count;
    };
    
    struct cache {
    	struct dm_target *ti;
    	struct dm_target_callbacks callbacks;
    
    	/*
    	 * Metadata is written to this device.
    	 */
    	struct dm_dev *metadata_dev;
    
    	/*
    	 * The slower of the two data devices.  Typically a spindle.
    	 */
    	struct dm_dev *origin_dev;
    
    	/*
    	 * The faster of the two data devices.  Typically an SSD.
    	 */
    	struct dm_dev *cache_dev;
    
    	/*
    	 * Cache features such as write-through.
    	 */
    	struct cache_features features;
    
    	/*
    	 * Size of the origin device in _complete_ blocks and native sectors.
    	 */
    	dm_oblock_t origin_blocks;
    	sector_t origin_sectors;
    
    	/*
    	 * Size of the cache device in blocks.
    	 */
    	dm_cblock_t cache_size;
    
    	/*
    	 * Fields for converting from sectors to blocks.
    	 */
    	uint32_t sectors_per_block;
    	int sectors_per_block_shift;
    
    	struct dm_cache_metadata *cmd;
    
    	spinlock_t lock;
    	struct bio_list deferred_bios;
    	struct bio_list deferred_flush_bios;
    	struct list_head quiesced_migrations;
    	struct list_head completed_migrations;
    	struct list_head need_commit_migrations;
    	sector_t migration_threshold;
    	atomic_t nr_migrations;
    	wait_queue_head_t migration_wait;
    
    	/*
    	 * cache_size entries, dirty if set
    	 */
    	dm_cblock_t nr_dirty;
    	unsigned long *dirty_bitset;
    
    	/*
    	 * origin_blocks entries, discarded if set.
    	 */
    
    	uint32_t discard_block_size; /* a power of 2 times sectors per block */
    
    Joe Thornber's avatar
    Joe Thornber committed
    	dm_dblock_t discard_nr_blocks;
    	unsigned long *discard_bitset;
    
    	struct dm_kcopyd_client *copier;
    	struct workqueue_struct *wq;
    	struct work_struct worker;
    
    	struct delayed_work waker;
    	unsigned long last_commit_jiffies;
    
    	struct dm_bio_prison *prison;
    	struct dm_deferred_set *all_io_ds;
    
    	mempool_t *migration_pool;
    	struct dm_cache_migration *next_migration;
    
    	struct dm_cache_policy *policy;
    	unsigned policy_nr_args;
    
    	bool need_tick_bio:1;
    	bool sized:1;
    	bool quiescing:1;
    	bool commit_requested:1;
    	bool loaded_mappings:1;
    	bool loaded_discards:1;
    
    	struct cache_stats stats;
    
    	/*
    	 * Rather than reconstructing the table line for the status we just
    	 * save it and regurgitate.
    	 */
    	unsigned nr_ctr_args;
    	const char **ctr_args;
    };
    
    struct per_bio_data {
    	bool tick:1;
    	unsigned req_nr:2;
    	struct dm_deferred_entry *all_io_entry;
    };
    
    struct dm_cache_migration {
    	struct list_head list;
    	struct cache *cache;
    
    	unsigned long start_jiffies;
    	dm_oblock_t old_oblock;
    	dm_oblock_t new_oblock;
    	dm_cblock_t cblock;
    
    	bool err:1;
    	bool writeback:1;
    	bool demote:1;
    	bool promote:1;
    
    	struct dm_bio_prison_cell *old_ocell;
    	struct dm_bio_prison_cell *new_ocell;
    };
    
    /*
     * Processing a bio in the worker thread may require these memory
     * allocations.  We prealloc to avoid deadlocks (the same worker thread
     * frees them back to the mempool).
     */
    struct prealloc {
    	struct dm_cache_migration *mg;
    	struct dm_bio_prison_cell *cell1;
    	struct dm_bio_prison_cell *cell2;
    };
    
    static void wake_worker(struct cache *cache)
    {
    	queue_work(cache->wq, &cache->worker);
    }
    
    /*----------------------------------------------------------------*/
    
    static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
    {
    	/* FIXME: change to use a local slab. */
    	return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
    }
    
    static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
    {
    	dm_bio_prison_free_cell(cache->prison, cell);
    }
    
    static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
    {
    	if (!p->mg) {
    		p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
    		if (!p->mg)
    			return -ENOMEM;
    	}
    
    	if (!p->cell1) {
    		p->cell1 = alloc_prison_cell(cache);
    		if (!p->cell1)
    			return -ENOMEM;
    	}
    
    	if (!p->cell2) {
    		p->cell2 = alloc_prison_cell(cache);
    		if (!p->cell2)
    			return -ENOMEM;
    	}
    
    	return 0;
    }
    
    static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
    {
    	if (p->cell2)
    		free_prison_cell(cache, p->cell2);
    
    	if (p->cell1)
    		free_prison_cell(cache, p->cell1);
    
    	if (p->mg)
    		mempool_free(p->mg, cache->migration_pool);
    }
    
    static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
    {
    	struct dm_cache_migration *mg = p->mg;
    
    	BUG_ON(!mg);
    	p->mg = NULL;
    
    	return mg;
    }
    
    /*
     * You must have a cell within the prealloc struct to return.  If not this
     * function will BUG() rather than returning NULL.
     */
    static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
    {
    	struct dm_bio_prison_cell *r = NULL;
    
    	if (p->cell1) {
    		r = p->cell1;
    		p->cell1 = NULL;
    
    	} else if (p->cell2) {
    		r = p->cell2;
    		p->cell2 = NULL;
    	} else
    		BUG();
    
    	return r;
    }
    
    /*
     * You can't have more than two cells in a prealloc struct.  BUG() will be
     * called if you try and overfill.
     */
    static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
    {
    	if (!p->cell2)
    		p->cell2 = cell;
    
    	else if (!p->cell1)
    		p->cell1 = cell;
    
    	else
    		BUG();
    }
    
    /*----------------------------------------------------------------*/
    
    static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
    {
    	key->virtual = 0;
    	key->dev = 0;
    	key->block = from_oblock(oblock);
    }
    
    /*
     * The caller hands in a preallocated cell, and a free function for it.
     * The cell will be freed if there's an error, or if it wasn't used because
     * a cell with that key already exists.
     */
    typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
    
    static int bio_detain(struct cache *cache, dm_oblock_t oblock,
    		      struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
    		      cell_free_fn free_fn, void *free_context,
    		      struct dm_bio_prison_cell **cell_result)
    {
    	int r;
    	struct dm_cell_key key;
    
    	build_key(oblock, &key);
    	r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
    	if (r)
    		free_fn(free_context, cell_prealloc);
    
    	return r;
    }
    
    static int get_cell(struct cache *cache,
    		    dm_oblock_t oblock,
    		    struct prealloc *structs,
    		    struct dm_bio_prison_cell **cell_result)
    {
    	int r;
    	struct dm_cell_key key;
    	struct dm_bio_prison_cell *cell_prealloc;
    
    	cell_prealloc = prealloc_get_cell(structs);
    
    	build_key(oblock, &key);
    	r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
    	if (r)
    		prealloc_put_cell(structs, cell_prealloc);
    
    	return r;
    }
    
     /*----------------------------------------------------------------*/
    
    static bool is_dirty(struct cache *cache, dm_cblock_t b)
    {
    	return test_bit(from_cblock(b), cache->dirty_bitset);
    }
    
    static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
    {
    	if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
    		cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1);
    		policy_set_dirty(cache->policy, oblock);
    	}
    }
    
    static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
    {
    	if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
    		policy_clear_dirty(cache->policy, oblock);
    		cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1);
    		if (!from_cblock(cache->nr_dirty))
    			dm_table_event(cache->ti->table);
    	}
    }
    
    /*----------------------------------------------------------------*/
    static bool block_size_is_power_of_two(struct cache *cache)
    {
    	return cache->sectors_per_block_shift >= 0;
    }
    
    
    static dm_block_t block_div(dm_block_t b, uint32_t n)
    {
    	do_div(b, n);
    
    	return b;
    }
    
    
    Joe Thornber's avatar
    Joe Thornber committed
    static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
    {
    
    	uint32_t discard_blocks = cache->discard_block_size;
    
    Joe Thornber's avatar
    Joe Thornber committed
    	dm_block_t b = from_oblock(oblock);
    
    	if (!block_size_is_power_of_two(cache))
    
    		discard_blocks = discard_blocks / cache->sectors_per_block;
    
    Joe Thornber's avatar
    Joe Thornber committed
    	else
    		discard_blocks >>= cache->sectors_per_block_shift;
    
    
    	b = block_div(b, discard_blocks);
    
    Joe Thornber's avatar
    Joe Thornber committed
    433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000
    
    	return to_dblock(b);
    }
    
    static void set_discard(struct cache *cache, dm_dblock_t b)
    {
    	unsigned long flags;
    
    	atomic_inc(&cache->stats.discard_count);
    
    	spin_lock_irqsave(&cache->lock, flags);
    	set_bit(from_dblock(b), cache->discard_bitset);
    	spin_unlock_irqrestore(&cache->lock, flags);
    }
    
    static void clear_discard(struct cache *cache, dm_dblock_t b)
    {
    	unsigned long flags;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	clear_bit(from_dblock(b), cache->discard_bitset);
    	spin_unlock_irqrestore(&cache->lock, flags);
    }
    
    static bool is_discarded(struct cache *cache, dm_dblock_t b)
    {
    	int r;
    	unsigned long flags;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	r = test_bit(from_dblock(b), cache->discard_bitset);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	return r;
    }
    
    static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
    {
    	int r;
    	unsigned long flags;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
    		     cache->discard_bitset);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	return r;
    }
    
    /*----------------------------------------------------------------*/
    
    static void load_stats(struct cache *cache)
    {
    	struct dm_cache_statistics stats;
    
    	dm_cache_metadata_get_stats(cache->cmd, &stats);
    	atomic_set(&cache->stats.read_hit, stats.read_hits);
    	atomic_set(&cache->stats.read_miss, stats.read_misses);
    	atomic_set(&cache->stats.write_hit, stats.write_hits);
    	atomic_set(&cache->stats.write_miss, stats.write_misses);
    }
    
    static void save_stats(struct cache *cache)
    {
    	struct dm_cache_statistics stats;
    
    	stats.read_hits = atomic_read(&cache->stats.read_hit);
    	stats.read_misses = atomic_read(&cache->stats.read_miss);
    	stats.write_hits = atomic_read(&cache->stats.write_hit);
    	stats.write_misses = atomic_read(&cache->stats.write_miss);
    
    	dm_cache_metadata_set_stats(cache->cmd, &stats);
    }
    
    /*----------------------------------------------------------------
     * Per bio data
     *--------------------------------------------------------------*/
    static struct per_bio_data *get_per_bio_data(struct bio *bio)
    {
    	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
    	BUG_ON(!pb);
    	return pb;
    }
    
    static struct per_bio_data *init_per_bio_data(struct bio *bio)
    {
    	struct per_bio_data *pb = get_per_bio_data(bio);
    
    	pb->tick = false;
    	pb->req_nr = dm_bio_get_target_bio_nr(bio);
    	pb->all_io_entry = NULL;
    
    	return pb;
    }
    
    /*----------------------------------------------------------------
     * Remapping
     *--------------------------------------------------------------*/
    static void remap_to_origin(struct cache *cache, struct bio *bio)
    {
    	bio->bi_bdev = cache->origin_dev->bdev;
    }
    
    static void remap_to_cache(struct cache *cache, struct bio *bio,
    			   dm_cblock_t cblock)
    {
    	sector_t bi_sector = bio->bi_sector;
    
    	bio->bi_bdev = cache->cache_dev->bdev;
    	if (!block_size_is_power_of_two(cache))
    		bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) +
    				sector_div(bi_sector, cache->sectors_per_block);
    	else
    		bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) |
    				(bi_sector & (cache->sectors_per_block - 1));
    }
    
    static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
    {
    	unsigned long flags;
    	struct per_bio_data *pb = get_per_bio_data(bio);
    
    	spin_lock_irqsave(&cache->lock, flags);
    	if (cache->need_tick_bio &&
    	    !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) {
    		pb->tick = true;
    		cache->need_tick_bio = false;
    	}
    	spin_unlock_irqrestore(&cache->lock, flags);
    }
    
    static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
    				  dm_oblock_t oblock)
    {
    	check_if_tick_bio_needed(cache, bio);
    	remap_to_origin(cache, bio);
    	if (bio_data_dir(bio) == WRITE)
    		clear_discard(cache, oblock_to_dblock(cache, oblock));
    }
    
    static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
    				 dm_oblock_t oblock, dm_cblock_t cblock)
    {
    	remap_to_cache(cache, bio, cblock);
    	if (bio_data_dir(bio) == WRITE) {
    		set_dirty(cache, oblock, cblock);
    		clear_discard(cache, oblock_to_dblock(cache, oblock));
    	}
    }
    
    static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
    {
    	sector_t block_nr = bio->bi_sector;
    
    	if (!block_size_is_power_of_two(cache))
    		(void) sector_div(block_nr, cache->sectors_per_block);
    	else
    		block_nr >>= cache->sectors_per_block_shift;
    
    	return to_oblock(block_nr);
    }
    
    static int bio_triggers_commit(struct cache *cache, struct bio *bio)
    {
    	return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
    }
    
    static void issue(struct cache *cache, struct bio *bio)
    {
    	unsigned long flags;
    
    	if (!bio_triggers_commit(cache, bio)) {
    		generic_make_request(bio);
    		return;
    	}
    
    	/*
    	 * Batch together any bios that trigger commits and then issue a
    	 * single commit for them in do_worker().
    	 */
    	spin_lock_irqsave(&cache->lock, flags);
    	cache->commit_requested = true;
    	bio_list_add(&cache->deferred_flush_bios, bio);
    	spin_unlock_irqrestore(&cache->lock, flags);
    }
    
    /*----------------------------------------------------------------
     * Migration processing
     *
     * Migration covers moving data from the origin device to the cache, or
     * vice versa.
     *--------------------------------------------------------------*/
    static void free_migration(struct dm_cache_migration *mg)
    {
    	mempool_free(mg, mg->cache->migration_pool);
    }
    
    static void inc_nr_migrations(struct cache *cache)
    {
    	atomic_inc(&cache->nr_migrations);
    }
    
    static void dec_nr_migrations(struct cache *cache)
    {
    	atomic_dec(&cache->nr_migrations);
    
    	/*
    	 * Wake the worker in case we're suspending the target.
    	 */
    	wake_up(&cache->migration_wait);
    }
    
    static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
    			 bool holder)
    {
    	(holder ? dm_cell_release : dm_cell_release_no_holder)
    		(cache->prison, cell, &cache->deferred_bios);
    	free_prison_cell(cache, cell);
    }
    
    static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
    		       bool holder)
    {
    	unsigned long flags;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	__cell_defer(cache, cell, holder);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	wake_worker(cache);
    }
    
    static void cleanup_migration(struct dm_cache_migration *mg)
    {
    	dec_nr_migrations(mg->cache);
    	free_migration(mg);
    }
    
    static void migration_failure(struct dm_cache_migration *mg)
    {
    	struct cache *cache = mg->cache;
    
    	if (mg->writeback) {
    		DMWARN_LIMIT("writeback failed; couldn't copy block");
    		set_dirty(cache, mg->old_oblock, mg->cblock);
    		cell_defer(cache, mg->old_ocell, false);
    
    	} else if (mg->demote) {
    		DMWARN_LIMIT("demotion failed; couldn't copy block");
    		policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
    
    		cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1);
    		if (mg->promote)
    			cell_defer(cache, mg->new_ocell, 1);
    	} else {
    		DMWARN_LIMIT("promotion failed; couldn't copy block");
    		policy_remove_mapping(cache->policy, mg->new_oblock);
    		cell_defer(cache, mg->new_ocell, 1);
    	}
    
    	cleanup_migration(mg);
    }
    
    static void migration_success_pre_commit(struct dm_cache_migration *mg)
    {
    	unsigned long flags;
    	struct cache *cache = mg->cache;
    
    	if (mg->writeback) {
    		cell_defer(cache, mg->old_ocell, false);
    		clear_dirty(cache, mg->old_oblock, mg->cblock);
    		cleanup_migration(mg);
    		return;
    
    	} else if (mg->demote) {
    		if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) {
    			DMWARN_LIMIT("demotion failed; couldn't update on disk metadata");
    			policy_force_mapping(cache->policy, mg->new_oblock,
    					     mg->old_oblock);
    			if (mg->promote)
    				cell_defer(cache, mg->new_ocell, true);
    			cleanup_migration(mg);
    			return;
    		}
    	} else {
    		if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
    			DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
    			policy_remove_mapping(cache->policy, mg->new_oblock);
    			cleanup_migration(mg);
    			return;
    		}
    	}
    
    	spin_lock_irqsave(&cache->lock, flags);
    	list_add_tail(&mg->list, &cache->need_commit_migrations);
    	cache->commit_requested = true;
    	spin_unlock_irqrestore(&cache->lock, flags);
    }
    
    static void migration_success_post_commit(struct dm_cache_migration *mg)
    {
    	unsigned long flags;
    	struct cache *cache = mg->cache;
    
    	if (mg->writeback) {
    		DMWARN("writeback unexpectedly triggered commit");
    		return;
    
    	} else if (mg->demote) {
    		cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1);
    
    		if (mg->promote) {
    			mg->demote = false;
    
    			spin_lock_irqsave(&cache->lock, flags);
    			list_add_tail(&mg->list, &cache->quiesced_migrations);
    			spin_unlock_irqrestore(&cache->lock, flags);
    
    		} else
    			cleanup_migration(mg);
    
    	} else {
    		cell_defer(cache, mg->new_ocell, true);
    		clear_dirty(cache, mg->new_oblock, mg->cblock);
    		cleanup_migration(mg);
    	}
    }
    
    static void copy_complete(int read_err, unsigned long write_err, void *context)
    {
    	unsigned long flags;
    	struct dm_cache_migration *mg = (struct dm_cache_migration *) context;
    	struct cache *cache = mg->cache;
    
    	if (read_err || write_err)
    		mg->err = true;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	list_add_tail(&mg->list, &cache->completed_migrations);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	wake_worker(cache);
    }
    
    static void issue_copy_real(struct dm_cache_migration *mg)
    {
    	int r;
    	struct dm_io_region o_region, c_region;
    	struct cache *cache = mg->cache;
    
    	o_region.bdev = cache->origin_dev->bdev;
    	o_region.count = cache->sectors_per_block;
    
    	c_region.bdev = cache->cache_dev->bdev;
    	c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block;
    	c_region.count = cache->sectors_per_block;
    
    	if (mg->writeback || mg->demote) {
    		/* demote */
    		o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block;
    		r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg);
    	} else {
    		/* promote */
    		o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block;
    		r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
    	}
    
    	if (r < 0)
    		migration_failure(mg);
    }
    
    static void avoid_copy(struct dm_cache_migration *mg)
    {
    	atomic_inc(&mg->cache->stats.copies_avoided);
    	migration_success_pre_commit(mg);
    }
    
    static void issue_copy(struct dm_cache_migration *mg)
    {
    	bool avoid;
    	struct cache *cache = mg->cache;
    
    	if (mg->writeback || mg->demote)
    		avoid = !is_dirty(cache, mg->cblock) ||
    			is_discarded_oblock(cache, mg->old_oblock);
    	else
    		avoid = is_discarded_oblock(cache, mg->new_oblock);
    
    	avoid ? avoid_copy(mg) : issue_copy_real(mg);
    }
    
    static void complete_migration(struct dm_cache_migration *mg)
    {
    	if (mg->err)
    		migration_failure(mg);
    	else
    		migration_success_pre_commit(mg);
    }
    
    static void process_migrations(struct cache *cache, struct list_head *head,
    			       void (*fn)(struct dm_cache_migration *))
    {
    	unsigned long flags;
    	struct list_head list;
    	struct dm_cache_migration *mg, *tmp;
    
    	INIT_LIST_HEAD(&list);
    	spin_lock_irqsave(&cache->lock, flags);
    	list_splice_init(head, &list);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	list_for_each_entry_safe(mg, tmp, &list, list)
    		fn(mg);
    }
    
    static void __queue_quiesced_migration(struct dm_cache_migration *mg)
    {
    	list_add_tail(&mg->list, &mg->cache->quiesced_migrations);
    }
    
    static void queue_quiesced_migration(struct dm_cache_migration *mg)
    {
    	unsigned long flags;
    	struct cache *cache = mg->cache;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	__queue_quiesced_migration(mg);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	wake_worker(cache);
    }
    
    static void queue_quiesced_migrations(struct cache *cache, struct list_head *work)
    {
    	unsigned long flags;
    	struct dm_cache_migration *mg, *tmp;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	list_for_each_entry_safe(mg, tmp, work, list)
    		__queue_quiesced_migration(mg);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	wake_worker(cache);
    }
    
    static void check_for_quiesced_migrations(struct cache *cache,
    					  struct per_bio_data *pb)
    {
    	struct list_head work;
    
    	if (!pb->all_io_entry)
    		return;
    
    	INIT_LIST_HEAD(&work);
    	if (pb->all_io_entry)
    		dm_deferred_entry_dec(pb->all_io_entry, &work);
    
    	if (!list_empty(&work))
    		queue_quiesced_migrations(cache, &work);
    }
    
    static void quiesce_migration(struct dm_cache_migration *mg)
    {
    	if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list))
    		queue_quiesced_migration(mg);
    }
    
    static void promote(struct cache *cache, struct prealloc *structs,
    		    dm_oblock_t oblock, dm_cblock_t cblock,
    		    struct dm_bio_prison_cell *cell)
    {
    	struct dm_cache_migration *mg = prealloc_get_migration(structs);
    
    	mg->err = false;
    	mg->writeback = false;
    	mg->demote = false;
    	mg->promote = true;
    	mg->cache = cache;
    	mg->new_oblock = oblock;
    	mg->cblock = cblock;
    	mg->old_ocell = NULL;
    	mg->new_ocell = cell;
    	mg->start_jiffies = jiffies;
    
    	inc_nr_migrations(cache);
    	quiesce_migration(mg);
    }
    
    static void writeback(struct cache *cache, struct prealloc *structs,
    		      dm_oblock_t oblock, dm_cblock_t cblock,
    		      struct dm_bio_prison_cell *cell)
    {
    	struct dm_cache_migration *mg = prealloc_get_migration(structs);
    
    	mg->err = false;
    	mg->writeback = true;
    	mg->demote = false;
    	mg->promote = false;
    	mg->cache = cache;
    	mg->old_oblock = oblock;
    	mg->cblock = cblock;
    	mg->old_ocell = cell;
    	mg->new_ocell = NULL;
    	mg->start_jiffies = jiffies;
    
    	inc_nr_migrations(cache);
    	quiesce_migration(mg);
    }
    
    static void demote_then_promote(struct cache *cache, struct prealloc *structs,
    				dm_oblock_t old_oblock, dm_oblock_t new_oblock,
    				dm_cblock_t cblock,
    				struct dm_bio_prison_cell *old_ocell,
    				struct dm_bio_prison_cell *new_ocell)
    {
    	struct dm_cache_migration *mg = prealloc_get_migration(structs);
    
    	mg->err = false;
    	mg->writeback = false;
    	mg->demote = true;
    	mg->promote = true;
    	mg->cache = cache;
    	mg->old_oblock = old_oblock;
    	mg->new_oblock = new_oblock;
    	mg->cblock = cblock;
    	mg->old_ocell = old_ocell;
    	mg->new_ocell = new_ocell;
    	mg->start_jiffies = jiffies;
    
    	inc_nr_migrations(cache);
    	quiesce_migration(mg);
    }
    
    /*----------------------------------------------------------------
     * bio processing
     *--------------------------------------------------------------*/
    static void defer_bio(struct cache *cache, struct bio *bio)
    {
    	unsigned long flags;
    
    	spin_lock_irqsave(&cache->lock, flags);
    	bio_list_add(&cache->deferred_bios, bio);
    	spin_unlock_irqrestore(&cache->lock, flags);
    
    	wake_worker(cache);
    }
    
    static void process_flush_bio(struct cache *cache, struct bio *bio)
    {
    	struct per_bio_data *pb = get_per_bio_data(bio);
    
    	BUG_ON(bio->bi_size);
    	if (!pb->req_nr)
    		remap_to_origin(cache, bio);
    	else
    		remap_to_cache(cache, bio, 0);
    
    	issue(cache, bio);
    }
    
    /*
     * People generally discard large parts of a device, eg, the whole device
     * when formatting.  Splitting these large discards up into cache block
     * sized ios and then quiescing (always neccessary for discard) takes too
     * long.
     *
     * We keep it simple, and allow any size of discard to come in, and just
     * mark off blocks on the discard bitset.  No passdown occurs!