Newer
Older
/*
* Copyright (C) 2012 Red Hat. All rights reserved.
*
* This file is released under the GPL.
*/
#include "dm.h"
#include "dm-bio-prison.h"
#include "dm-bio-record.h"
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#include "dm-cache-metadata.h"
#include <linux/dm-io.h>
#include <linux/dm-kcopyd.h>
#include <linux/init.h>
#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#define DM_MSG_PREFIX "cache"
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
"A percentage of time allocated for copying to and/or from cache");
/*----------------------------------------------------------------*/
/*
* Glossary:
*
* oblock: index of an origin block
* cblock: index of a cache block
* promotion: movement of a block from origin to cache
* demotion: movement of a block from cache to origin
* migration: movement of a block between the origin and cache device,
* either direction
*/
/*----------------------------------------------------------------*/
static size_t bitset_size_in_bytes(unsigned nr_entries)
{
return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
}
static unsigned long *alloc_bitset(unsigned nr_entries)
{
size_t s = bitset_size_in_bytes(nr_entries);
return vzalloc(s);
}
static void clear_bitset(void *bitset, unsigned nr_entries)
{
size_t s = bitset_size_in_bytes(nr_entries);
memset(bitset, 0, s);
}
static void free_bitset(unsigned long *bits)
{
vfree(bits);
}
/*----------------------------------------------------------------*/
/*
* There are a couple of places where we let a bio run, but want to do some
* work before calling its endio function. We do this by temporarily
* changing the endio fn.
*/
struct dm_hook_info {
bio_end_io_t *bi_end_io;
void *bi_private;
};
static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
bio_end_io_t *bi_end_io, void *bi_private)
{
h->bi_end_io = bio->bi_end_io;
h->bi_private = bio->bi_private;
bio->bi_end_io = bi_end_io;
bio->bi_private = bi_private;
}
static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
{
bio->bi_end_io = h->bi_end_io;
bio->bi_private = h->bi_private;
}
/*----------------------------------------------------------------*/
#define PRISON_CELLS 1024
#define MIGRATION_POOL_SIZE 128
#define COMMIT_PERIOD HZ
#define MIGRATION_COUNT_WINDOW 10
/*
* The block size of the device holding cache data must be
* between 32KB and 1GB.
*/
#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
/*
* FIXME: the cache is read/write for the time being.
*/
CM_WRITE, /* metadata may be changed */
CM_READ_ONLY, /* metadata may not be changed */
};
enum cache_io_mode {
/*
* Data is written to cached blocks only. These blocks are marked
* dirty. If you lose the cache device you will lose data.
* Potential performance increase for both reads and writes.
*/
CM_IO_WRITEBACK,
/*
* Data is written to both cache and origin. Blocks are never
* dirty. Potential performance benfit for reads only.
*/
CM_IO_WRITETHROUGH,
/*
* A degraded mode useful for various cache coherency situations
* (eg, rolling back snapshots). Reads and writes always go to the
* origin. If a write goes to a cached oblock, then the cache
* block is invalidated.
*/
CM_IO_PASSTHROUGH
};
enum cache_metadata_mode mode;
enum cache_io_mode io_mode;
};
struct cache_stats {
atomic_t read_hit;
atomic_t read_miss;
atomic_t write_hit;
atomic_t write_miss;
atomic_t demotion;
atomic_t promotion;
atomic_t copies_avoided;
atomic_t cache_cell_clash;
atomic_t commit_count;
atomic_t discard_count;
};
struct cache {
struct dm_target *ti;
struct dm_target_callbacks callbacks;
struct dm_cache_metadata *cmd;
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
/*
* Metadata is written to this device.
*/
struct dm_dev *metadata_dev;
/*
* The slower of the two data devices. Typically a spindle.
*/
struct dm_dev *origin_dev;
/*
* The faster of the two data devices. Typically an SSD.
*/
struct dm_dev *cache_dev;
/*
* Size of the origin device in _complete_ blocks and native sectors.
*/
dm_oblock_t origin_blocks;
sector_t origin_sectors;
/*
* Size of the cache device in blocks.
*/
dm_cblock_t cache_size;
/*
* Fields for converting from sectors to blocks.
*/
uint32_t sectors_per_block;
int sectors_per_block_shift;
spinlock_t lock;
struct bio_list deferred_bios;
struct bio_list deferred_flush_bios;
struct bio_list deferred_writethrough_bios;
struct list_head quiesced_migrations;
struct list_head completed_migrations;
struct list_head need_commit_migrations;
sector_t migration_threshold;
wait_queue_head_t migration_wait;

Joe Thornber
committed
wait_queue_head_t quiescing_wait;
atomic_t quiescing;

Joe Thornber
committed
atomic_t quiescing_ack;
/*
* cache_size entries, dirty if set
*/
dm_cblock_t nr_dirty;
unsigned long *dirty_bitset;
/*
* origin_blocks entries, discarded if set.
*/
dm_dblock_t discard_nr_blocks;
unsigned long *discard_bitset;
uint32_t discard_block_size; /* a power of 2 times sectors per block */
/*
* Rather than reconstructing the table line for the status we just
* save it and regurgitate.
*/
unsigned nr_ctr_args;
const char **ctr_args;
struct dm_kcopyd_client *copier;
struct workqueue_struct *wq;
struct work_struct worker;
struct delayed_work waker;
unsigned long last_commit_jiffies;
struct dm_bio_prison *prison;
struct dm_deferred_set *all_io_ds;
mempool_t *migration_pool;
struct dm_cache_migration *next_migration;
struct dm_cache_policy *policy;
unsigned policy_nr_args;
bool need_tick_bio:1;
bool sized:1;
bool commit_requested:1;
bool loaded_mappings:1;
bool loaded_discards:1;
/*
* Cache features such as write-through.
struct cache_features features;
struct cache_stats stats;
};
struct per_bio_data {
bool tick:1;
unsigned req_nr:2;
struct dm_deferred_entry *all_io_entry;
/*
* writethrough fields. These MUST remain at the end of this
* structure and the 'cache' member must be the first as it
* is used to determine the offset of the writethrough fields.
struct cache *cache;
dm_cblock_t cblock;
struct dm_hook_info hook_info;
struct dm_bio_details bio_details;
};
struct dm_cache_migration {
struct list_head list;
struct cache *cache;
unsigned long start_jiffies;
dm_oblock_t old_oblock;
dm_oblock_t new_oblock;
dm_cblock_t cblock;
bool err:1;
bool writeback:1;
bool demote:1;
bool promote:1;
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
struct dm_bio_prison_cell *old_ocell;
struct dm_bio_prison_cell *new_ocell;
};
/*
* Processing a bio in the worker thread may require these memory
* allocations. We prealloc to avoid deadlocks (the same worker thread
* frees them back to the mempool).
*/
struct prealloc {
struct dm_cache_migration *mg;
struct dm_bio_prison_cell *cell1;
struct dm_bio_prison_cell *cell2;
};
static void wake_worker(struct cache *cache)
{
queue_work(cache->wq, &cache->worker);
}
/*----------------------------------------------------------------*/
static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
{
/* FIXME: change to use a local slab. */
return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
}
static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
{
dm_bio_prison_free_cell(cache->prison, cell);
}
static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
{
if (!p->mg) {
p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
if (!p->mg)
return -ENOMEM;
}
if (!p->cell1) {
p->cell1 = alloc_prison_cell(cache);
if (!p->cell1)
return -ENOMEM;
}
if (!p->cell2) {
p->cell2 = alloc_prison_cell(cache);
if (!p->cell2)
return -ENOMEM;
}
return 0;
}
static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
{
if (p->cell2)
free_prison_cell(cache, p->cell2);
if (p->cell1)
free_prison_cell(cache, p->cell1);
if (p->mg)
mempool_free(p->mg, cache->migration_pool);
}
static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
{
struct dm_cache_migration *mg = p->mg;
BUG_ON(!mg);
p->mg = NULL;
return mg;
}
/*
* You must have a cell within the prealloc struct to return. If not this
* function will BUG() rather than returning NULL.
*/
static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
{
struct dm_bio_prison_cell *r = NULL;
if (p->cell1) {
r = p->cell1;
p->cell1 = NULL;
} else if (p->cell2) {
r = p->cell2;
p->cell2 = NULL;
} else
BUG();
return r;
}
/*
* You can't have more than two cells in a prealloc struct. BUG() will be
* called if you try and overfill.
*/
static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
{
if (!p->cell2)
p->cell2 = cell;
else if (!p->cell1)
p->cell1 = cell;
else
BUG();
}
/*----------------------------------------------------------------*/
static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
{
key->virtual = 0;
key->dev = 0;
key->block = from_oblock(oblock);
}
/*
* The caller hands in a preallocated cell, and a free function for it.
* The cell will be freed if there's an error, or if it wasn't used because
* a cell with that key already exists.
*/
typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
static int bio_detain(struct cache *cache, dm_oblock_t oblock,
struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
cell_free_fn free_fn, void *free_context,
struct dm_bio_prison_cell **cell_result)
{
int r;
struct dm_cell_key key;
build_key(oblock, &key);
r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
if (r)
free_fn(free_context, cell_prealloc);
return r;
}
static int get_cell(struct cache *cache,
dm_oblock_t oblock,
struct prealloc *structs,
struct dm_bio_prison_cell **cell_result)
{
int r;
struct dm_cell_key key;
struct dm_bio_prison_cell *cell_prealloc;
cell_prealloc = prealloc_get_cell(structs);
build_key(oblock, &key);
r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
if (r)
prealloc_put_cell(structs, cell_prealloc);
return r;
}
/*----------------------------------------------------------------*/
static bool is_dirty(struct cache *cache, dm_cblock_t b)
{
return test_bit(from_cblock(b), cache->dirty_bitset);
}
static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
{
if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1);
policy_set_dirty(cache->policy, oblock);
}
}
static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
{
if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
policy_clear_dirty(cache->policy, oblock);
cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1);
if (!from_cblock(cache->nr_dirty))
dm_table_event(cache->ti->table);
}
}
/*----------------------------------------------------------------*/
static bool block_size_is_power_of_two(struct cache *cache)
{
return cache->sectors_per_block_shift >= 0;
}
/* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */
#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
__always_inline
#endif
static dm_block_t block_div(dm_block_t b, uint32_t n)
{
do_div(b, n);
return b;
}
static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
{
uint32_t discard_blocks = cache->discard_block_size;
dm_block_t b = from_oblock(oblock);
if (!block_size_is_power_of_two(cache))
discard_blocks = discard_blocks / cache->sectors_per_block;
else
discard_blocks >>= cache->sectors_per_block_shift;
b = block_div(b, discard_blocks);
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
return to_dblock(b);
}
static void set_discard(struct cache *cache, dm_dblock_t b)
{
unsigned long flags;
atomic_inc(&cache->stats.discard_count);
spin_lock_irqsave(&cache->lock, flags);
set_bit(from_dblock(b), cache->discard_bitset);
spin_unlock_irqrestore(&cache->lock, flags);
}
static void clear_discard(struct cache *cache, dm_dblock_t b)
{
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
clear_bit(from_dblock(b), cache->discard_bitset);
spin_unlock_irqrestore(&cache->lock, flags);
}
static bool is_discarded(struct cache *cache, dm_dblock_t b)
{
int r;
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
r = test_bit(from_dblock(b), cache->discard_bitset);
spin_unlock_irqrestore(&cache->lock, flags);
return r;
}
static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
{
int r;
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
cache->discard_bitset);
spin_unlock_irqrestore(&cache->lock, flags);
return r;
}
/*----------------------------------------------------------------*/
static void load_stats(struct cache *cache)
{
struct dm_cache_statistics stats;
dm_cache_metadata_get_stats(cache->cmd, &stats);
atomic_set(&cache->stats.read_hit, stats.read_hits);
atomic_set(&cache->stats.read_miss, stats.read_misses);
atomic_set(&cache->stats.write_hit, stats.write_hits);
atomic_set(&cache->stats.write_miss, stats.write_misses);
}
static void save_stats(struct cache *cache)
{
struct dm_cache_statistics stats;
stats.read_hits = atomic_read(&cache->stats.read_hit);
stats.read_misses = atomic_read(&cache->stats.read_miss);
stats.write_hits = atomic_read(&cache->stats.write_hit);
stats.write_misses = atomic_read(&cache->stats.write_miss);
dm_cache_metadata_set_stats(cache->cmd, &stats);
}
/*----------------------------------------------------------------
* Per bio data
*--------------------------------------------------------------*/
/*
* If using writeback, leave out struct per_bio_data's writethrough fields.
*/
#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
static bool writethrough_mode(struct cache_features *f)
{
return f->io_mode == CM_IO_WRITETHROUGH;
}
static bool writeback_mode(struct cache_features *f)
{
return f->io_mode == CM_IO_WRITEBACK;
}
static bool passthrough_mode(struct cache_features *f)
{
return f->io_mode == CM_IO_PASSTHROUGH;
}
static size_t get_per_bio_data_size(struct cache *cache)
{
return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
}
static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
struct per_bio_data *pb = get_per_bio_data(bio, data_size);
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
pb->tick = false;
pb->req_nr = dm_bio_get_target_bio_nr(bio);
pb->all_io_entry = NULL;
return pb;
}
/*----------------------------------------------------------------
* Remapping
*--------------------------------------------------------------*/
static void remap_to_origin(struct cache *cache, struct bio *bio)
{
bio->bi_bdev = cache->origin_dev->bdev;
}
static void remap_to_cache(struct cache *cache, struct bio *bio,
dm_cblock_t cblock)
{
sector_t bi_sector = bio->bi_sector;
bio->bi_bdev = cache->cache_dev->bdev;
if (!block_size_is_power_of_two(cache))
bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) +
sector_div(bi_sector, cache->sectors_per_block);
else
bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) |
(bi_sector & (cache->sectors_per_block - 1));
}
static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
{
unsigned long flags;
size_t pb_data_size = get_per_bio_data_size(cache);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
spin_lock_irqsave(&cache->lock, flags);
if (cache->need_tick_bio &&
!(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) {
pb->tick = true;
cache->need_tick_bio = false;
}
spin_unlock_irqrestore(&cache->lock, flags);
}
static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
dm_oblock_t oblock)
{
check_if_tick_bio_needed(cache, bio);
remap_to_origin(cache, bio);
if (bio_data_dir(bio) == WRITE)
clear_discard(cache, oblock_to_dblock(cache, oblock));
}
static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, dm_cblock_t cblock)
{
check_if_tick_bio_needed(cache, bio);
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
remap_to_cache(cache, bio, cblock);
if (bio_data_dir(bio) == WRITE) {
set_dirty(cache, oblock, cblock);
clear_discard(cache, oblock_to_dblock(cache, oblock));
}
}
static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
{
sector_t block_nr = bio->bi_sector;
if (!block_size_is_power_of_two(cache))
(void) sector_div(block_nr, cache->sectors_per_block);
else
block_nr >>= cache->sectors_per_block_shift;
return to_oblock(block_nr);
}
static int bio_triggers_commit(struct cache *cache, struct bio *bio)
{
return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
}
static void issue(struct cache *cache, struct bio *bio)
{
unsigned long flags;
if (!bio_triggers_commit(cache, bio)) {
generic_make_request(bio);
return;
}
/*
* Batch together any bios that trigger commits and then issue a
* single commit for them in do_worker().
*/
spin_lock_irqsave(&cache->lock, flags);
cache->commit_requested = true;
bio_list_add(&cache->deferred_flush_bios, bio);
spin_unlock_irqrestore(&cache->lock, flags);
}
static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
{
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
bio_list_add(&cache->deferred_writethrough_bios, bio);
spin_unlock_irqrestore(&cache->lock, flags);
wake_worker(cache);
}
static void writethrough_endio(struct bio *bio, int err)
{
struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
dm_unhook_bio(&pb->hook_info, bio);
if (err) {
bio_endio(bio, err);
return;
}
dm_bio_restore(&pb->bio_details, bio);
remap_to_cache(pb->cache, bio, pb->cblock);
/*
* We can't issue this bio directly, since we're in interrupt
* context. So it gets put on a bio list for processing by the
* worker thread.
*/
defer_writethrough_bio(pb->cache, bio);
}
/*
* When running in writethrough mode we need to send writes to clean blocks
* to both the cache and origin devices. In future we'd like to clone the
* bio and send them in parallel, but for now we're doing them in
* series as this is easier.
*/
static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, dm_cblock_t cblock)
{
struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
pb->cache = cache;
pb->cblock = cblock;
dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
dm_bio_record(&pb->bio_details, bio);
remap_to_origin_clear_discard(pb->cache, bio, oblock);
}
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
/*----------------------------------------------------------------
* Migration processing
*
* Migration covers moving data from the origin device to the cache, or
* vice versa.
*--------------------------------------------------------------*/
static void free_migration(struct dm_cache_migration *mg)
{
mempool_free(mg, mg->cache->migration_pool);
}
static void inc_nr_migrations(struct cache *cache)
{
atomic_inc(&cache->nr_migrations);
}
static void dec_nr_migrations(struct cache *cache)
{
atomic_dec(&cache->nr_migrations);
/*
* Wake the worker in case we're suspending the target.
*/
wake_up(&cache->migration_wait);
}
static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
bool holder)
{
(holder ? dm_cell_release : dm_cell_release_no_holder)
(cache->prison, cell, &cache->deferred_bios);
free_prison_cell(cache, cell);
}
static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
bool holder)
{
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
__cell_defer(cache, cell, holder);
spin_unlock_irqrestore(&cache->lock, flags);
wake_worker(cache);
}
static void cleanup_migration(struct dm_cache_migration *mg)
{

Joe Thornber
committed
struct cache *cache = mg->cache;

Joe Thornber
committed
dec_nr_migrations(cache);
}
static void migration_failure(struct dm_cache_migration *mg)
{
struct cache *cache = mg->cache;
if (mg->writeback) {
DMWARN_LIMIT("writeback failed; couldn't copy block");
set_dirty(cache, mg->old_oblock, mg->cblock);
cell_defer(cache, mg->old_ocell, false);
} else if (mg->demote) {
DMWARN_LIMIT("demotion failed; couldn't copy block");
policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
cell_defer(cache, mg->new_ocell, true);
} else {
DMWARN_LIMIT("promotion failed; couldn't copy block");
policy_remove_mapping(cache->policy, mg->new_oblock);
cell_defer(cache, mg->new_ocell, true);
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
}
cleanup_migration(mg);
}
static void migration_success_pre_commit(struct dm_cache_migration *mg)
{
unsigned long flags;
struct cache *cache = mg->cache;
if (mg->writeback) {
cell_defer(cache, mg->old_ocell, false);
clear_dirty(cache, mg->old_oblock, mg->cblock);
cleanup_migration(mg);
return;
} else if (mg->demote) {
if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) {
DMWARN_LIMIT("demotion failed; couldn't update on disk metadata");
policy_force_mapping(cache->policy, mg->new_oblock,
mg->old_oblock);
if (mg->promote)
cell_defer(cache, mg->new_ocell, true);
cleanup_migration(mg);
return;
}
} else {
if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
policy_remove_mapping(cache->policy, mg->new_oblock);
cleanup_migration(mg);
return;
}
}
spin_lock_irqsave(&cache->lock, flags);
list_add_tail(&mg->list, &cache->need_commit_migrations);
cache->commit_requested = true;
spin_unlock_irqrestore(&cache->lock, flags);
}
static void migration_success_post_commit(struct dm_cache_migration *mg)
{
unsigned long flags;
struct cache *cache = mg->cache;
if (mg->writeback) {
DMWARN("writeback unexpectedly triggered commit");
return;
} else if (mg->demote) {
cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
if (mg->promote) {
mg->demote = false;
spin_lock_irqsave(&cache->lock, flags);
list_add_tail(&mg->list, &cache->quiesced_migrations);
spin_unlock_irqrestore(&cache->lock, flags);
} else
cleanup_migration(mg);
} else {
if (mg->requeue_holder)
cell_defer(cache, mg->new_ocell, true);
else {
bio_endio(mg->new_ocell->holder, 0);
cell_defer(cache, mg->new_ocell, false);
}
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
clear_dirty(cache, mg->new_oblock, mg->cblock);
cleanup_migration(mg);
}
}
static void copy_complete(int read_err, unsigned long write_err, void *context)
{
unsigned long flags;
struct dm_cache_migration *mg = (struct dm_cache_migration *) context;
struct cache *cache = mg->cache;
if (read_err || write_err)
mg->err = true;
spin_lock_irqsave(&cache->lock, flags);
list_add_tail(&mg->list, &cache->completed_migrations);
spin_unlock_irqrestore(&cache->lock, flags);
wake_worker(cache);
}
static void issue_copy_real(struct dm_cache_migration *mg)
{
int r;
struct dm_io_region o_region, c_region;
struct cache *cache = mg->cache;
o_region.bdev = cache->origin_dev->bdev;
o_region.count = cache->sectors_per_block;
c_region.bdev = cache->cache_dev->bdev;
c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block;
c_region.count = cache->sectors_per_block;
if (mg->writeback || mg->demote) {
/* demote */
o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block;
r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg);
} else {
/* promote */
o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block;
r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
}
if (r < 0) {
DMERR_LIMIT("issuing migration failed");
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
static void overwrite_endio(struct bio *bio, int err)
{
struct dm_cache_migration *mg = bio->bi_private;
struct cache *cache = mg->cache;
size_t pb_data_size = get_per_bio_data_size(cache);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
unsigned long flags;
if (err)
mg->err = true;
spin_lock_irqsave(&cache->lock, flags);
list_add_tail(&mg->list, &cache->completed_migrations);
dm_unhook_bio(&pb->hook_info, bio);
mg->requeue_holder = false;
spin_unlock_irqrestore(&cache->lock, flags);
wake_worker(cache);
}
static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
{
size_t pb_data_size = get_per_bio_data_size(mg->cache);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
generic_make_request(bio);
}
static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
{
return (bio_data_dir(bio) == WRITE) &&
(bio->bi_size == (cache->sectors_per_block << SECTOR_SHIFT));