Skip to content
Snippets Groups Projects
Commit f412f2c6 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull second round of block driver updates from Jens Axboe:
 "As mentioned in the original pull request, the bcache bits were pulled
  because of their dependency on the immutable bio vecs.  Kent re-did
  this part and resubmitted it, so here's the 2nd round of (mostly)
  driver updates for 3.13.  It contains:

 - The bcache work from Kent.

 - Conversion of virtio-blk to blk-mq.  This removes the bio and request
   path, and substitutes with the blk-mq path instead.  The end result
   almost 200 deleted lines.  Patch is acked by Asias and Christoph, who
   both did a bunch of testing.

 - A removal of bootmem.h include from Grygorii Strashko, part of a
   larger series of his killing the dependency on that header file.

 - Removal of __cpuinit from blk-mq from Paul Gortmaker"

* 'for-linus' of git://git.kernel.dk/linux-block: (56 commits)
  virtio_blk: blk-mq support
  blk-mq: remove newly added instances of __cpuinit
  bcache: defensively handle format strings
  bcache: Bypass torture test
  bcache: Delete some slower inline asm
  bcache: Use ida for bcache block dev minor
  bcache: Fix sysfs splat on shutdown with flash only devs
  bcache: Better full stripe scanning
  bcache: Have btree_split() insert into parent directly
  bcache: Move spinlock into struct time_stats
  bcache: Kill sequential_merge option
  bcache: Kill bch_next_recurse_key()
  bcache: Avoid deadlocking in garbage collection
  bcache: Incremental gc
  bcache: Add make_btree_freeing_key()
  bcache: Add btree_node_write_sync()
  bcache: PRECEDING_KEY()
  bcache: bch_(btree|extent)_ptr_invalid()
  bcache: Don't bother with bucket refcount for btree node allocations
  bcache: Debug code improvements
  ...
parents cd1177f2 1cf7e9c6
No related merge requests found
Showing
with 2229 additions and 2900 deletions
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
#include <linux/slab.h> #include <linux/slab.h>
#include "blk.h" #include "blk.h"
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
static LIST_HEAD(blk_mq_cpu_notify_list); static LIST_HEAD(blk_mq_cpu_notify_list);
static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock); static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
static int __cpuinit blk_mq_main_cpu_notify(struct notifier_block *self, static int blk_mq_main_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu) unsigned long action, void *hcpu)
{ {
unsigned int cpu = (unsigned long) hcpu; unsigned int cpu = (unsigned long) hcpu;
struct blk_mq_cpu_notifier *notify; struct blk_mq_cpu_notifier *notify;
...@@ -28,8 +28,8 @@ static int __cpuinit blk_mq_main_cpu_notify(struct notifier_block *self, ...@@ -28,8 +28,8 @@ static int __cpuinit blk_mq_main_cpu_notify(struct notifier_block *self,
return NOTIFY_OK; return NOTIFY_OK;
} }
static void __cpuinit blk_mq_cpu_notify(void *data, unsigned long action, static void blk_mq_cpu_notify(void *data, unsigned long action,
unsigned int cpu) unsigned int cpu)
{ {
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
/* /*
......
...@@ -1444,7 +1444,7 @@ void blk_mq_free_queue(struct request_queue *q) ...@@ -1444,7 +1444,7 @@ void blk_mq_free_queue(struct request_queue *q)
EXPORT_SYMBOL(blk_mq_free_queue); EXPORT_SYMBOL(blk_mq_free_queue);
/* Basically redo blk_mq_init_queue with queue frozen */ /* Basically redo blk_mq_init_queue with queue frozen */
static void __cpuinit blk_mq_queue_reinit(struct request_queue *q) static void blk_mq_queue_reinit(struct request_queue *q)
{ {
blk_mq_freeze_queue(q); blk_mq_freeze_queue(q);
...@@ -1461,8 +1461,8 @@ static void __cpuinit blk_mq_queue_reinit(struct request_queue *q) ...@@ -1461,8 +1461,8 @@ static void __cpuinit blk_mq_queue_reinit(struct request_queue *q)
blk_mq_unfreeze_queue(q); blk_mq_unfreeze_queue(q);
} }
static int __cpuinit blk_mq_queue_reinit_notify(struct notifier_block *nb, static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
unsigned long action, void *hcpu) unsigned long action, void *hcpu)
{ {
struct request_queue *q; struct request_queue *q;
......
...@@ -11,12 +11,11 @@ ...@@ -11,12 +11,11 @@
#include <linux/string_helpers.h> #include <linux/string_helpers.h>
#include <scsi/scsi_cmnd.h> #include <scsi/scsi_cmnd.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/blk-mq.h>
#include <linux/numa.h>
#define PART_BITS 4 #define PART_BITS 4
static bool use_bio;
module_param(use_bio, bool, S_IRUGO);
static int major; static int major;
static DEFINE_IDA(vd_index_ida); static DEFINE_IDA(vd_index_ida);
...@@ -26,13 +25,11 @@ struct virtio_blk ...@@ -26,13 +25,11 @@ struct virtio_blk
{ {
struct virtio_device *vdev; struct virtio_device *vdev;
struct virtqueue *vq; struct virtqueue *vq;
wait_queue_head_t queue_wait; spinlock_t vq_lock;
/* The disk structure for the kernel. */ /* The disk structure for the kernel. */
struct gendisk *disk; struct gendisk *disk;
mempool_t *pool;
/* Process context for config space updates */ /* Process context for config space updates */
struct work_struct config_work; struct work_struct config_work;
...@@ -47,31 +44,17 @@ struct virtio_blk ...@@ -47,31 +44,17 @@ struct virtio_blk
/* Ida index - used to track minor number allocations. */ /* Ida index - used to track minor number allocations. */
int index; int index;
/* Scatterlist: can be too big for stack. */
struct scatterlist sg[/*sg_elems*/];
}; };
struct virtblk_req struct virtblk_req
{ {
struct request *req; struct request *req;
struct bio *bio;
struct virtio_blk_outhdr out_hdr; struct virtio_blk_outhdr out_hdr;
struct virtio_scsi_inhdr in_hdr; struct virtio_scsi_inhdr in_hdr;
struct work_struct work;
struct virtio_blk *vblk;
int flags;
u8 status; u8 status;
struct scatterlist sg[]; struct scatterlist sg[];
}; };
enum {
VBLK_IS_FLUSH = 1,
VBLK_REQ_FLUSH = 2,
VBLK_REQ_DATA = 4,
VBLK_REQ_FUA = 8,
};
static inline int virtblk_result(struct virtblk_req *vbr) static inline int virtblk_result(struct virtblk_req *vbr)
{ {
switch (vbr->status) { switch (vbr->status) {
...@@ -84,22 +67,6 @@ static inline int virtblk_result(struct virtblk_req *vbr) ...@@ -84,22 +67,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
} }
} }
static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
gfp_t gfp_mask)
{
struct virtblk_req *vbr;
vbr = mempool_alloc(vblk->pool, gfp_mask);
if (!vbr)
return NULL;
vbr->vblk = vblk;
if (use_bio)
sg_init_table(vbr->sg, vblk->sg_elems);
return vbr;
}
static int __virtblk_add_req(struct virtqueue *vq, static int __virtblk_add_req(struct virtqueue *vq,
struct virtblk_req *vbr, struct virtblk_req *vbr,
struct scatterlist *data_sg, struct scatterlist *data_sg,
...@@ -143,83 +110,8 @@ static int __virtblk_add_req(struct virtqueue *vq, ...@@ -143,83 +110,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
} }
static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
{
struct virtio_blk *vblk = vbr->vblk;
DEFINE_WAIT(wait);
int ret;
spin_lock_irq(vblk->disk->queue->queue_lock);
while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
have_data)) < 0)) {
prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
TASK_UNINTERRUPTIBLE);
spin_unlock_irq(vblk->disk->queue->queue_lock);
io_schedule();
spin_lock_irq(vblk->disk->queue->queue_lock);
finish_wait(&vblk->queue_wait, &wait);
}
virtqueue_kick(vblk->vq);
spin_unlock_irq(vblk->disk->queue->queue_lock);
}
static void virtblk_bio_send_flush(struct virtblk_req *vbr)
{
vbr->flags |= VBLK_IS_FLUSH;
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
vbr->out_hdr.sector = 0;
vbr->out_hdr.ioprio = 0;
virtblk_add_req(vbr, false);
}
static void virtblk_bio_send_data(struct virtblk_req *vbr)
{
struct virtio_blk *vblk = vbr->vblk;
struct bio *bio = vbr->bio;
bool have_data;
vbr->flags &= ~VBLK_IS_FLUSH;
vbr->out_hdr.type = 0;
vbr->out_hdr.sector = bio->bi_sector;
vbr->out_hdr.ioprio = bio_prio(bio);
if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
have_data = true;
if (bio->bi_rw & REQ_WRITE)
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
else
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
} else
have_data = false;
virtblk_add_req(vbr, have_data);
}
static void virtblk_bio_send_data_work(struct work_struct *work)
{
struct virtblk_req *vbr;
vbr = container_of(work, struct virtblk_req, work);
virtblk_bio_send_data(vbr);
}
static void virtblk_bio_send_flush_work(struct work_struct *work)
{
struct virtblk_req *vbr;
vbr = container_of(work, struct virtblk_req, work);
virtblk_bio_send_flush(vbr);
}
static inline void virtblk_request_done(struct virtblk_req *vbr) static inline void virtblk_request_done(struct virtblk_req *vbr)
{ {
struct virtio_blk *vblk = vbr->vblk;
struct request *req = vbr->req; struct request *req = vbr->req;
int error = virtblk_result(vbr); int error = virtblk_result(vbr);
...@@ -231,92 +123,45 @@ static inline void virtblk_request_done(struct virtblk_req *vbr) ...@@ -231,92 +123,45 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
req->errors = (error != 0); req->errors = (error != 0);
} }
__blk_end_request_all(req, error); blk_mq_end_io(req, error);
mempool_free(vbr, vblk->pool);
}
static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
{
struct virtio_blk *vblk = vbr->vblk;
if (vbr->flags & VBLK_REQ_DATA) {
/* Send out the actual write data */
INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
queue_work(virtblk_wq, &vbr->work);
} else {
bio_endio(vbr->bio, virtblk_result(vbr));
mempool_free(vbr, vblk->pool);
}
}
static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
{
struct virtio_blk *vblk = vbr->vblk;
if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
/* Send out a flush before end the bio */
vbr->flags &= ~VBLK_REQ_DATA;
INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
queue_work(virtblk_wq, &vbr->work);
} else {
bio_endio(vbr->bio, virtblk_result(vbr));
mempool_free(vbr, vblk->pool);
}
}
static inline void virtblk_bio_done(struct virtblk_req *vbr)
{
if (unlikely(vbr->flags & VBLK_IS_FLUSH))
virtblk_bio_flush_done(vbr);
else
virtblk_bio_data_done(vbr);
} }
static void virtblk_done(struct virtqueue *vq) static void virtblk_done(struct virtqueue *vq)
{ {
struct virtio_blk *vblk = vq->vdev->priv; struct virtio_blk *vblk = vq->vdev->priv;
bool bio_done = false, req_done = false; bool req_done = false;
struct virtblk_req *vbr; struct virtblk_req *vbr;
unsigned long flags; unsigned long flags;
unsigned int len; unsigned int len;
spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); spin_lock_irqsave(&vblk->vq_lock, flags);
do { do {
virtqueue_disable_cb(vq); virtqueue_disable_cb(vq);
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
if (vbr->bio) { virtblk_request_done(vbr);
virtblk_bio_done(vbr); req_done = true;
bio_done = true;
} else {
virtblk_request_done(vbr);
req_done = true;
}
} }
if (unlikely(virtqueue_is_broken(vq))) if (unlikely(virtqueue_is_broken(vq)))
break; break;
} while (!virtqueue_enable_cb(vq)); } while (!virtqueue_enable_cb(vq));
spin_unlock_irqrestore(&vblk->vq_lock, flags);
/* In case queue is stopped waiting for more buffers. */ /* In case queue is stopped waiting for more buffers. */
if (req_done) if (req_done)
blk_start_queue(vblk->disk->queue); blk_mq_start_stopped_hw_queues(vblk->disk->queue);
spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
if (bio_done)
wake_up(&vblk->queue_wait);
} }
static bool do_req(struct request_queue *q, struct virtio_blk *vblk, static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
struct request *req)
{ {
struct virtio_blk *vblk = hctx->queue->queuedata;
struct virtblk_req *vbr = req->special;
unsigned long flags;
unsigned int num; unsigned int num;
struct virtblk_req *vbr; const bool last = (req->cmd_flags & REQ_END) != 0;
vbr = virtblk_alloc_req(vblk, GFP_ATOMIC); BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
if (!vbr)
/* When another request finishes we'll try again. */
return false;
vbr->req = req; vbr->req = req;
vbr->bio = NULL;
if (req->cmd_flags & REQ_FLUSH) { if (req->cmd_flags & REQ_FLUSH) {
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
vbr->out_hdr.sector = 0; vbr->out_hdr.sector = 0;
...@@ -344,7 +189,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, ...@@ -344,7 +189,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
} }
} }
num = blk_rq_map_sg(q, vbr->req, vblk->sg); num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
if (num) { if (num) {
if (rq_data_dir(vbr->req) == WRITE) if (rq_data_dir(vbr->req) == WRITE)
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
...@@ -352,63 +197,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, ...@@ -352,63 +197,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
vbr->out_hdr.type |= VIRTIO_BLK_T_IN; vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
} }
if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) { spin_lock_irqsave(&vblk->vq_lock, flags);
mempool_free(vbr, vblk->pool); if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
return false; spin_unlock_irqrestore(&vblk->vq_lock, flags);
} blk_mq_stop_hw_queue(hctx);
return true;
}
static void virtblk_request(struct request_queue *q)
{
struct virtio_blk *vblk = q->queuedata;
struct request *req;
unsigned int issued = 0;
while ((req = blk_peek_request(q)) != NULL) {
BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
/* If this request fails, stop queue and wait for something to
finish to restart it. */
if (!do_req(q, vblk, req)) {
blk_stop_queue(q);
break;
}
blk_start_request(req);
issued++;
}
if (issued)
virtqueue_kick(vblk->vq); virtqueue_kick(vblk->vq);
} return BLK_MQ_RQ_QUEUE_BUSY;
static void virtblk_make_request(struct request_queue *q, struct bio *bio)
{
struct virtio_blk *vblk = q->queuedata;
struct virtblk_req *vbr;
BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
vbr = virtblk_alloc_req(vblk, GFP_NOIO);
if (!vbr) {
bio_endio(bio, -ENOMEM);
return;
} }
spin_unlock_irqrestore(&vblk->vq_lock, flags);
vbr->bio = bio; if (last)
vbr->flags = 0; virtqueue_kick(vblk->vq);
if (bio->bi_rw & REQ_FLUSH) return BLK_MQ_RQ_QUEUE_OK;
vbr->flags |= VBLK_REQ_FLUSH;
if (bio->bi_rw & REQ_FUA)
vbr->flags |= VBLK_REQ_FUA;
if (bio->bi_size)
vbr->flags |= VBLK_REQ_DATA;
if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
virtblk_bio_send_flush(vbr);
else
virtblk_bio_send_data(vbr);
} }
/* return id (s/n) string for *disk to *id_str /* return id (s/n) string for *disk to *id_str
...@@ -673,12 +473,35 @@ static const struct device_attribute dev_attr_cache_type_rw = ...@@ -673,12 +473,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
__ATTR(cache_type, S_IRUGO|S_IWUSR, __ATTR(cache_type, S_IRUGO|S_IWUSR,
virtblk_cache_type_show, virtblk_cache_type_store); virtblk_cache_type_show, virtblk_cache_type_store);
static struct blk_mq_ops virtio_mq_ops = {
.queue_rq = virtio_queue_rq,
.map_queue = blk_mq_map_queue,
.alloc_hctx = blk_mq_alloc_single_hw_queue,
.free_hctx = blk_mq_free_single_hw_queue,
};
static struct blk_mq_reg virtio_mq_reg = {
.ops = &virtio_mq_ops,
.nr_hw_queues = 1,
.queue_depth = 64,
.numa_node = NUMA_NO_NODE,
.flags = BLK_MQ_F_SHOULD_MERGE,
};
static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
struct request *rq, unsigned int nr)
{
struct virtio_blk *vblk = data;
struct virtblk_req *vbr = rq->special;
sg_init_table(vbr->sg, vblk->sg_elems);
}
static int virtblk_probe(struct virtio_device *vdev) static int virtblk_probe(struct virtio_device *vdev)
{ {
struct virtio_blk *vblk; struct virtio_blk *vblk;
struct request_queue *q; struct request_queue *q;
int err, index; int err, index;
int pool_size;
u64 cap; u64 cap;
u32 v, blk_size, sg_elems, opt_io_size; u32 v, blk_size, sg_elems, opt_io_size;
...@@ -702,17 +525,14 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -702,17 +525,14 @@ static int virtblk_probe(struct virtio_device *vdev)
/* We need an extra sg elements at head and tail. */ /* We need an extra sg elements at head and tail. */
sg_elems += 2; sg_elems += 2;
vdev->priv = vblk = kmalloc(sizeof(*vblk) + vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
if (!vblk) { if (!vblk) {
err = -ENOMEM; err = -ENOMEM;
goto out_free_index; goto out_free_index;
} }
init_waitqueue_head(&vblk->queue_wait);
vblk->vdev = vdev; vblk->vdev = vdev;
vblk->sg_elems = sg_elems; vblk->sg_elems = sg_elems;
sg_init_table(vblk->sg, vblk->sg_elems);
mutex_init(&vblk->config_lock); mutex_init(&vblk->config_lock);
INIT_WORK(&vblk->config_work, virtblk_config_changed_work); INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
...@@ -721,31 +541,27 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -721,31 +541,27 @@ static int virtblk_probe(struct virtio_device *vdev)
err = init_vq(vblk); err = init_vq(vblk);
if (err) if (err)
goto out_free_vblk; goto out_free_vblk;
spin_lock_init(&vblk->vq_lock);
pool_size = sizeof(struct virtblk_req);
if (use_bio)
pool_size += sizeof(struct scatterlist) * sg_elems;
vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
if (!vblk->pool) {
err = -ENOMEM;
goto out_free_vq;
}
/* FIXME: How many partitions? How long is a piece of string? */ /* FIXME: How many partitions? How long is a piece of string? */
vblk->disk = alloc_disk(1 << PART_BITS); vblk->disk = alloc_disk(1 << PART_BITS);
if (!vblk->disk) { if (!vblk->disk) {
err = -ENOMEM; err = -ENOMEM;
goto out_mempool; goto out_free_vq;
} }
q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL); virtio_mq_reg.cmd_size =
sizeof(struct virtblk_req) +
sizeof(struct scatterlist) * sg_elems;
q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
if (!q) { if (!q) {
err = -ENOMEM; err = -ENOMEM;
goto out_put_disk; goto out_put_disk;
} }
if (use_bio) blk_mq_init_commands(q, virtblk_init_vbr, vblk);
blk_queue_make_request(q, virtblk_make_request);
q->queuedata = vblk; q->queuedata = vblk;
virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
...@@ -848,8 +664,6 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -848,8 +664,6 @@ static int virtblk_probe(struct virtio_device *vdev)
blk_cleanup_queue(vblk->disk->queue); blk_cleanup_queue(vblk->disk->queue);
out_put_disk: out_put_disk:
put_disk(vblk->disk); put_disk(vblk->disk);
out_mempool:
mempool_destroy(vblk->pool);
out_free_vq: out_free_vq:
vdev->config->del_vqs(vdev); vdev->config->del_vqs(vdev);
out_free_vblk: out_free_vblk:
...@@ -881,7 +695,6 @@ static void virtblk_remove(struct virtio_device *vdev) ...@@ -881,7 +695,6 @@ static void virtblk_remove(struct virtio_device *vdev)
refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
put_disk(vblk->disk); put_disk(vblk->disk);
mempool_destroy(vblk->pool);
vdev->config->del_vqs(vdev); vdev->config->del_vqs(vdev);
kfree(vblk); kfree(vblk);
...@@ -905,10 +718,7 @@ static int virtblk_freeze(struct virtio_device *vdev) ...@@ -905,10 +718,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
flush_work(&vblk->config_work); flush_work(&vblk->config_work);
spin_lock_irq(vblk->disk->queue->queue_lock); blk_mq_stop_hw_queues(vblk->disk->queue);
blk_stop_queue(vblk->disk->queue);
spin_unlock_irq(vblk->disk->queue->queue_lock);
blk_sync_queue(vblk->disk->queue);
vdev->config->del_vqs(vdev); vdev->config->del_vqs(vdev);
return 0; return 0;
...@@ -921,11 +731,9 @@ static int virtblk_restore(struct virtio_device *vdev) ...@@ -921,11 +731,9 @@ static int virtblk_restore(struct virtio_device *vdev)
vblk->config_enable = true; vblk->config_enable = true;
ret = init_vq(vdev->priv); ret = init_vq(vdev->priv);
if (!ret) { if (!ret)
spin_lock_irq(vblk->disk->queue->queue_lock); blk_mq_start_stopped_hw_queues(vblk->disk->queue);
blk_start_queue(vblk->disk->queue);
spin_unlock_irq(vblk->disk->queue->queue_lock);
}
return ret; return ret;
} }
#endif #endif
......
...@@ -13,15 +13,8 @@ config BCACHE_DEBUG ...@@ -13,15 +13,8 @@ config BCACHE_DEBUG
---help--- ---help---
Don't select this option unless you're a developer Don't select this option unless you're a developer
Enables extra debugging tools (primarily a fuzz tester) Enables extra debugging tools, allows expensive runtime checks to be
turned on.
config BCACHE_EDEBUG
bool "Extended runtime checks"
depends on BCACHE
---help---
Don't select this option unless you're a developer
Enables extra runtime checks which significantly affect performance
config BCACHE_CLOSURES_DEBUG config BCACHE_CLOSURES_DEBUG
bool "Debug closures" bool "Debug closures"
......
...@@ -63,13 +63,12 @@ ...@@ -63,13 +63,12 @@
#include "bcache.h" #include "bcache.h"
#include "btree.h" #include "btree.h"
#include <linux/blkdev.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/random.h> #include <linux/random.h>
#include <trace/events/bcache.h> #include <trace/events/bcache.h>
#define MAX_IN_FLIGHT_DISCARDS 8U
/* Bucket heap / gen */ /* Bucket heap / gen */
uint8_t bch_inc_gen(struct cache *ca, struct bucket *b) uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
...@@ -121,75 +120,6 @@ void bch_rescale_priorities(struct cache_set *c, int sectors) ...@@ -121,75 +120,6 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
} }
/* Discard/TRIM */
struct discard {
struct list_head list;
struct work_struct work;
struct cache *ca;
long bucket;
struct bio bio;
struct bio_vec bv;
};
static void discard_finish(struct work_struct *w)
{
struct discard *d = container_of(w, struct discard, work);
struct cache *ca = d->ca;
char buf[BDEVNAME_SIZE];
if (!test_bit(BIO_UPTODATE, &d->bio.bi_flags)) {
pr_notice("discard error on %s, disabling",
bdevname(ca->bdev, buf));
d->ca->discard = 0;
}
mutex_lock(&ca->set->bucket_lock);
fifo_push(&ca->free, d->bucket);
list_add(&d->list, &ca->discards);
atomic_dec(&ca->discards_in_flight);
mutex_unlock(&ca->set->bucket_lock);
closure_wake_up(&ca->set->bucket_wait);
wake_up_process(ca->alloc_thread);
closure_put(&ca->set->cl);
}
static void discard_endio(struct bio *bio, int error)
{
struct discard *d = container_of(bio, struct discard, bio);
schedule_work(&d->work);
}
static void do_discard(struct cache *ca, long bucket)
{
struct discard *d = list_first_entry(&ca->discards,
struct discard, list);
list_del(&d->list);
d->bucket = bucket;
atomic_inc(&ca->discards_in_flight);
closure_get(&ca->set->cl);
bio_init(&d->bio);
d->bio.bi_sector = bucket_to_sector(ca->set, d->bucket);
d->bio.bi_bdev = ca->bdev;
d->bio.bi_rw = REQ_WRITE|REQ_DISCARD;
d->bio.bi_max_vecs = 1;
d->bio.bi_io_vec = d->bio.bi_inline_vecs;
d->bio.bi_size = bucket_bytes(ca);
d->bio.bi_end_io = discard_endio;
bio_set_prio(&d->bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
submit_bio(0, &d->bio);
}
/* Allocation */ /* Allocation */
static inline bool can_inc_bucket_gen(struct bucket *b) static inline bool can_inc_bucket_gen(struct bucket *b)
...@@ -280,7 +210,7 @@ static void invalidate_buckets_lru(struct cache *ca) ...@@ -280,7 +210,7 @@ static void invalidate_buckets_lru(struct cache *ca)
* multiple times when it can't do anything * multiple times when it can't do anything
*/ */
ca->invalidate_needs_gc = 1; ca->invalidate_needs_gc = 1;
bch_queue_gc(ca->set); wake_up_gc(ca->set);
return; return;
} }
...@@ -305,7 +235,7 @@ static void invalidate_buckets_fifo(struct cache *ca) ...@@ -305,7 +235,7 @@ static void invalidate_buckets_fifo(struct cache *ca)
if (++checked >= ca->sb.nbuckets) { if (++checked >= ca->sb.nbuckets) {
ca->invalidate_needs_gc = 1; ca->invalidate_needs_gc = 1;
bch_queue_gc(ca->set); wake_up_gc(ca->set);
return; return;
} }
} }
...@@ -330,7 +260,7 @@ static void invalidate_buckets_random(struct cache *ca) ...@@ -330,7 +260,7 @@ static void invalidate_buckets_random(struct cache *ca)
if (++checked >= ca->sb.nbuckets / 2) { if (++checked >= ca->sb.nbuckets / 2) {
ca->invalidate_needs_gc = 1; ca->invalidate_needs_gc = 1;
bch_queue_gc(ca->set); wake_up_gc(ca->set);
return; return;
} }
} }
...@@ -398,16 +328,18 @@ static int bch_allocator_thread(void *arg) ...@@ -398,16 +328,18 @@ static int bch_allocator_thread(void *arg)
else else
break; break;
allocator_wait(ca, (int) fifo_free(&ca->free) >
atomic_read(&ca->discards_in_flight));
if (ca->discard) { if (ca->discard) {
allocator_wait(ca, !list_empty(&ca->discards)); mutex_unlock(&ca->set->bucket_lock);
do_discard(ca, bucket); blkdev_issue_discard(ca->bdev,
} else { bucket_to_sector(ca->set, bucket),
fifo_push(&ca->free, bucket); ca->sb.block_size, GFP_KERNEL, 0);
closure_wake_up(&ca->set->bucket_wait); mutex_lock(&ca->set->bucket_lock);
} }
allocator_wait(ca, !fifo_full(&ca->free));
fifo_push(&ca->free, bucket);
wake_up(&ca->set->bucket_wait);
} }
/* /*
...@@ -433,16 +365,40 @@ static int bch_allocator_thread(void *arg) ...@@ -433,16 +365,40 @@ static int bch_allocator_thread(void *arg)
} }
} }
long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl) long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait)
{ {
long r = -1; DEFINE_WAIT(w);
again: struct bucket *b;
long r;
/* fastpath */
if (fifo_used(&ca->free) > ca->watermark[watermark]) {
fifo_pop(&ca->free, r);
goto out;
}
if (!wait)
return -1;
while (1) {
if (fifo_used(&ca->free) > ca->watermark[watermark]) {
fifo_pop(&ca->free, r);
break;
}
prepare_to_wait(&ca->set->bucket_wait, &w,
TASK_UNINTERRUPTIBLE);
mutex_unlock(&ca->set->bucket_lock);
schedule();
mutex_lock(&ca->set->bucket_lock);
}
finish_wait(&ca->set->bucket_wait, &w);
out:
wake_up_process(ca->alloc_thread); wake_up_process(ca->alloc_thread);
if (fifo_used(&ca->free) > ca->watermark[watermark] && if (expensive_debug_checks(ca->set)) {
fifo_pop(&ca->free, r)) {
struct bucket *b = ca->buckets + r;
#ifdef CONFIG_BCACHE_EDEBUG
size_t iter; size_t iter;
long i; long i;
...@@ -455,36 +411,23 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl) ...@@ -455,36 +411,23 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl)
BUG_ON(i == r); BUG_ON(i == r);
fifo_for_each(i, &ca->unused, iter) fifo_for_each(i, &ca->unused, iter)
BUG_ON(i == r); BUG_ON(i == r);
#endif
BUG_ON(atomic_read(&b->pin) != 1);
SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
if (watermark <= WATERMARK_METADATA) {
SET_GC_MARK(b, GC_MARK_METADATA);
b->prio = BTREE_PRIO;
} else {
SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
b->prio = INITIAL_PRIO;
}
return r;
} }
trace_bcache_alloc_fail(ca); b = ca->buckets + r;
if (cl) { BUG_ON(atomic_read(&b->pin) != 1);
closure_wait(&ca->set->bucket_wait, cl);
if (closure_blocking(cl)) { SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
mutex_unlock(&ca->set->bucket_lock);
closure_sync(cl); if (watermark <= WATERMARK_METADATA) {
mutex_lock(&ca->set->bucket_lock); SET_GC_MARK(b, GC_MARK_METADATA);
goto again; b->prio = BTREE_PRIO;
} } else {
SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
b->prio = INITIAL_PRIO;
} }
return -1; return r;
} }
void bch_bucket_free(struct cache_set *c, struct bkey *k) void bch_bucket_free(struct cache_set *c, struct bkey *k)
...@@ -501,7 +444,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k) ...@@ -501,7 +444,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k)
} }
int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
struct bkey *k, int n, struct closure *cl) struct bkey *k, int n, bool wait)
{ {
int i; int i;
...@@ -514,7 +457,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, ...@@ -514,7 +457,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
struct cache *ca = c->cache_by_alloc[i]; struct cache *ca = c->cache_by_alloc[i];
long b = bch_bucket_alloc(ca, watermark, cl); long b = bch_bucket_alloc(ca, watermark, wait);
if (b == -1) if (b == -1)
goto err; goto err;
...@@ -529,22 +472,202 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, ...@@ -529,22 +472,202 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
return 0; return 0;
err: err:
bch_bucket_free(c, k); bch_bucket_free(c, k);
__bkey_put(c, k); bkey_put(c, k);
return -1; return -1;
} }
int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark, int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
struct bkey *k, int n, struct closure *cl) struct bkey *k, int n, bool wait)
{ {
int ret; int ret;
mutex_lock(&c->bucket_lock); mutex_lock(&c->bucket_lock);
ret = __bch_bucket_alloc_set(c, watermark, k, n, cl); ret = __bch_bucket_alloc_set(c, watermark, k, n, wait);
mutex_unlock(&c->bucket_lock); mutex_unlock(&c->bucket_lock);
return ret; return ret;
} }
/* Sector allocator */
struct open_bucket {
struct list_head list;
unsigned last_write_point;
unsigned sectors_free;
BKEY_PADDED(key);
};
/*
* We keep multiple buckets open for writes, and try to segregate different
* write streams for better cache utilization: first we look for a bucket where
* the last write to it was sequential with the current write, and failing that
* we look for a bucket that was last used by the same task.
*
* The ideas is if you've got multiple tasks pulling data into the cache at the
* same time, you'll get better cache utilization if you try to segregate their
* data and preserve locality.
*
* For example, say you've starting Firefox at the same time you're copying a
* bunch of files. Firefox will likely end up being fairly hot and stay in the
* cache awhile, but the data you copied might not be; if you wrote all that
* data to the same buckets it'd get invalidated at the same time.
*
* Both of those tasks will be doing fairly random IO so we can't rely on
* detecting sequential IO to segregate their data, but going off of the task
* should be a sane heuristic.
*/
static struct open_bucket *pick_data_bucket(struct cache_set *c,
const struct bkey *search,
unsigned write_point,
struct bkey *alloc)
{
struct open_bucket *ret, *ret_task = NULL;
list_for_each_entry_reverse(ret, &c->data_buckets, list)
if (!bkey_cmp(&ret->key, search))
goto found;
else if (ret->last_write_point == write_point)
ret_task = ret;
ret = ret_task ?: list_first_entry(&c->data_buckets,
struct open_bucket, list);
found:
if (!ret->sectors_free && KEY_PTRS(alloc)) {
ret->sectors_free = c->sb.bucket_size;
bkey_copy(&ret->key, alloc);
bkey_init(alloc);
}
if (!ret->sectors_free)
ret = NULL;
return ret;
}
/*
* Allocates some space in the cache to write to, and k to point to the newly
* allocated space, and updates KEY_SIZE(k) and KEY_OFFSET(k) (to point to the
* end of the newly allocated space).
*
* May allocate fewer sectors than @sectors, KEY_SIZE(k) indicates how many
* sectors were actually allocated.
*
* If s->writeback is true, will not fail.
*/
bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
unsigned write_point, unsigned write_prio, bool wait)
{
struct open_bucket *b;
BKEY_PADDED(key) alloc;
unsigned i;
/*
* We might have to allocate a new bucket, which we can't do with a
* spinlock held. So if we have to allocate, we drop the lock, allocate
* and then retry. KEY_PTRS() indicates whether alloc points to
* allocated bucket(s).
*/
bkey_init(&alloc.key);
spin_lock(&c->data_bucket_lock);
while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
unsigned watermark = write_prio
? WATERMARK_MOVINGGC
: WATERMARK_NONE;
spin_unlock(&c->data_bucket_lock);
if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, wait))
return false;
spin_lock(&c->data_bucket_lock);
}
/*
* If we had to allocate, we might race and not need to allocate the
* second time we call find_data_bucket(). If we allocated a bucket but
* didn't use it, drop the refcount bch_bucket_alloc_set() took:
*/
if (KEY_PTRS(&alloc.key))
bkey_put(c, &alloc.key);
for (i = 0; i < KEY_PTRS(&b->key); i++)
EBUG_ON(ptr_stale(c, &b->key, i));
/* Set up the pointer to the space we're allocating: */
for (i = 0; i < KEY_PTRS(&b->key); i++)
k->ptr[i] = b->key.ptr[i];
sectors = min(sectors, b->sectors_free);
SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors);
SET_KEY_SIZE(k, sectors);
SET_KEY_PTRS(k, KEY_PTRS(&b->key));
/*
* Move b to the end of the lru, and keep track of what this bucket was
* last used for:
*/
list_move_tail(&b->list, &c->data_buckets);
bkey_copy_key(&b->key, k);
b->last_write_point = write_point;
b->sectors_free -= sectors;
for (i = 0; i < KEY_PTRS(&b->key); i++) {
SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors);
atomic_long_add(sectors,
&PTR_CACHE(c, &b->key, i)->sectors_written);
}
if (b->sectors_free < c->sb.block_size)
b->sectors_free = 0;
/*
* k takes refcounts on the buckets it points to until it's inserted
* into the btree, but if we're done with this bucket we just transfer
* get_data_bucket()'s refcount.
*/
if (b->sectors_free)
for (i = 0; i < KEY_PTRS(&b->key); i++)
atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin);
spin_unlock(&c->data_bucket_lock);
return true;
}
/* Init */ /* Init */
void bch_open_buckets_free(struct cache_set *c)
{
struct open_bucket *b;
while (!list_empty(&c->data_buckets)) {
b = list_first_entry(&c->data_buckets,
struct open_bucket, list);
list_del(&b->list);
kfree(b);
}
}
int bch_open_buckets_alloc(struct cache_set *c)
{
int i;
spin_lock_init(&c->data_bucket_lock);
for (i = 0; i < 6; i++) {
struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL);
if (!b)
return -ENOMEM;
list_add(&b->list, &c->data_buckets);
}
return 0;
}
int bch_cache_allocator_start(struct cache *ca) int bch_cache_allocator_start(struct cache *ca)
{ {
struct task_struct *k = kthread_run(bch_allocator_thread, struct task_struct *k = kthread_run(bch_allocator_thread,
...@@ -556,22 +679,8 @@ int bch_cache_allocator_start(struct cache *ca) ...@@ -556,22 +679,8 @@ int bch_cache_allocator_start(struct cache *ca)
return 0; return 0;
} }
void bch_cache_allocator_exit(struct cache *ca)
{
struct discard *d;
while (!list_empty(&ca->discards)) {
d = list_first_entry(&ca->discards, struct discard, list);
cancel_work_sync(&d->work);
list_del(&d->list);
kfree(d);
}
}
int bch_cache_allocator_init(struct cache *ca) int bch_cache_allocator_init(struct cache *ca)
{ {
unsigned i;
/* /*
* Reserve: * Reserve:
* Prio/gen writes first * Prio/gen writes first
...@@ -589,15 +698,5 @@ int bch_cache_allocator_init(struct cache *ca) ...@@ -589,15 +698,5 @@ int bch_cache_allocator_init(struct cache *ca)
ca->watermark[WATERMARK_NONE] = ca->free.size / 2 + ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
ca->watermark[WATERMARK_MOVINGGC]; ca->watermark[WATERMARK_MOVINGGC];
for (i = 0; i < MAX_IN_FLIGHT_DISCARDS; i++) {
struct discard *d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d)
return -ENOMEM;
d->ca = ca;
INIT_WORK(&d->work, discard_finish);
list_add(&d->list, &ca->discards);
}
return 0; return 0;
} }
...@@ -177,6 +177,7 @@ ...@@ -177,6 +177,7 @@
#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
#include <linux/bcache.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/kobject.h> #include <linux/kobject.h>
#include <linux/list.h> #include <linux/list.h>
...@@ -210,168 +211,6 @@ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2); ...@@ -210,168 +211,6 @@ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
#define GC_MARK_METADATA 2 #define GC_MARK_METADATA 2
BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14); BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14);
struct bkey {
uint64_t high;
uint64_t low;
uint64_t ptr[];
};
/* Enough for a key with 6 pointers */
#define BKEY_PAD 8
#define BKEY_PADDED(key) \
union { struct bkey key; uint64_t key ## _pad[BKEY_PAD]; }
/* Version 0: Cache device
* Version 1: Backing device
* Version 2: Seed pointer into btree node checksum
* Version 3: Cache device with new UUID format
* Version 4: Backing device with data offset
*/
#define BCACHE_SB_VERSION_CDEV 0
#define BCACHE_SB_VERSION_BDEV 1
#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3
#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
#define BCACHE_SB_MAX_VERSION 4
#define SB_SECTOR 8
#define SB_SIZE 4096
#define SB_LABEL_SIZE 32
#define SB_JOURNAL_BUCKETS 256U
/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */
#define MAX_CACHES_PER_SET 8
#define BDEV_DATA_START_DEFAULT 16 /* sectors */
struct cache_sb {
uint64_t csum;
uint64_t offset; /* sector where this sb was written */
uint64_t version;
uint8_t magic[16];
uint8_t uuid[16];
union {
uint8_t set_uuid[16];
uint64_t set_magic;
};
uint8_t label[SB_LABEL_SIZE];
uint64_t flags;
uint64_t seq;
uint64_t pad[8];
union {
struct {
/* Cache devices */
uint64_t nbuckets; /* device size */
uint16_t block_size; /* sectors */
uint16_t bucket_size; /* sectors */
uint16_t nr_in_set;
uint16_t nr_this_dev;
};
struct {
/* Backing devices */
uint64_t data_offset;
/*
* block_size from the cache device section is still used by
* backing devices, so don't add anything here until we fix
* things to not need it for backing devices anymore
*/
};
};
uint32_t last_mount; /* time_t */
uint16_t first_bucket;
union {
uint16_t njournal_buckets;
uint16_t keys;
};
uint64_t d[SB_JOURNAL_BUCKETS]; /* journal buckets */
};
BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
#define CACHE_REPLACEMENT_LRU 0U
#define CACHE_REPLACEMENT_FIFO 1U
#define CACHE_REPLACEMENT_RANDOM 2U
BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
#define CACHE_MODE_WRITETHROUGH 0U
#define CACHE_MODE_WRITEBACK 1U
#define CACHE_MODE_WRITEAROUND 2U
#define CACHE_MODE_NONE 3U
BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2);
#define BDEV_STATE_NONE 0U
#define BDEV_STATE_CLEAN 1U
#define BDEV_STATE_DIRTY 2U
#define BDEV_STATE_STALE 3U
/* Version 1: Seed pointer into btree node checksum
*/
#define BCACHE_BSET_VERSION 1
/*
* This is the on disk format for btree nodes - a btree node on disk is a list
* of these; within each set the keys are sorted
*/
struct bset {
uint64_t csum;
uint64_t magic;
uint64_t seq;
uint32_t version;
uint32_t keys;
union {
struct bkey start[0];
uint64_t d[0];
};
};
/*
* On disk format for priorities and gens - see super.c near prio_write() for
* more.
*/
struct prio_set {
uint64_t csum;
uint64_t magic;
uint64_t seq;
uint32_t version;
uint32_t pad;
uint64_t next_bucket;
struct bucket_disk {
uint16_t prio;
uint8_t gen;
} __attribute((packed)) data[];
};
struct uuid_entry {
union {
struct {
uint8_t uuid[16];
uint8_t label[32];
uint32_t first_reg;
uint32_t last_reg;
uint32_t invalidated;
uint32_t flags;
/* Size of flash only volumes */
uint64_t sectors;
};
uint8_t pad[128];
};
};
BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1);
#include "journal.h" #include "journal.h"
#include "stats.h" #include "stats.h"
struct search; struct search;
...@@ -384,8 +223,6 @@ struct keybuf_key { ...@@ -384,8 +223,6 @@ struct keybuf_key {
void *private; void *private;
}; };
typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
struct keybuf { struct keybuf {
struct bkey last_scanned; struct bkey last_scanned;
spinlock_t lock; spinlock_t lock;
...@@ -400,7 +237,7 @@ struct keybuf { ...@@ -400,7 +237,7 @@ struct keybuf {
struct rb_root keys; struct rb_root keys;
#define KEYBUF_NR 100 #define KEYBUF_NR 500
DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR); DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR);
}; };
...@@ -429,16 +266,15 @@ struct bcache_device { ...@@ -429,16 +266,15 @@ struct bcache_device {
struct gendisk *disk; struct gendisk *disk;
/* If nonzero, we're closing */ unsigned long flags;
atomic_t closing; #define BCACHE_DEV_CLOSING 0
#define BCACHE_DEV_DETACHING 1
/* If nonzero, we're detaching/unregistering from cache set */ #define BCACHE_DEV_UNLINK_DONE 2
atomic_t detaching;
int flush_done;
uint64_t nr_stripes; unsigned nr_stripes;
unsigned stripe_size_bits; unsigned stripe_size;
atomic_t *stripe_sectors_dirty; atomic_t *stripe_sectors_dirty;
unsigned long *full_dirty_stripes;
unsigned long sectors_dirty_last; unsigned long sectors_dirty_last;
long sectors_dirty_derivative; long sectors_dirty_derivative;
...@@ -509,7 +345,7 @@ struct cached_dev { ...@@ -509,7 +345,7 @@ struct cached_dev {
/* Limit number of writeback bios in flight */ /* Limit number of writeback bios in flight */
struct semaphore in_flight; struct semaphore in_flight;
struct closure_with_timer writeback; struct task_struct *writeback_thread;
struct keybuf writeback_keys; struct keybuf writeback_keys;
...@@ -527,8 +363,8 @@ struct cached_dev { ...@@ -527,8 +363,8 @@ struct cached_dev {
unsigned sequential_cutoff; unsigned sequential_cutoff;
unsigned readahead; unsigned readahead;
unsigned sequential_merge:1;
unsigned verify:1; unsigned verify:1;
unsigned bypass_torture_test:1;
unsigned partial_stripes_expensive:1; unsigned partial_stripes_expensive:1;
unsigned writeback_metadata:1; unsigned writeback_metadata:1;
...@@ -620,15 +456,6 @@ struct cache { ...@@ -620,15 +456,6 @@ struct cache {
bool discard; /* Get rid of? */ bool discard; /* Get rid of? */
/*
* We preallocate structs for issuing discards to buckets, and keep them
* on this list when they're not in use; do_discard() issues discards
* whenever there's work to do and is called by free_some_buckets() and
* when a discard finishes.
*/
atomic_t discards_in_flight;
struct list_head discards;
struct journal_device journal; struct journal_device journal;
/* The rest of this all shows up in sysfs */ /* The rest of this all shows up in sysfs */
...@@ -649,7 +476,6 @@ struct gc_stat { ...@@ -649,7 +476,6 @@ struct gc_stat {
size_t nkeys; size_t nkeys;
uint64_t data; /* sectors */ uint64_t data; /* sectors */
uint64_t dirty; /* sectors */
unsigned in_use; /* percent */ unsigned in_use; /* percent */
}; };
...@@ -744,8 +570,8 @@ struct cache_set { ...@@ -744,8 +570,8 @@ struct cache_set {
* basically a lock for this that we can wait on asynchronously. The * basically a lock for this that we can wait on asynchronously. The
* btree_root() macro releases the lock when it returns. * btree_root() macro releases the lock when it returns.
*/ */
struct closure *try_harder; struct task_struct *try_harder;
struct closure_waitlist try_wait; wait_queue_head_t try_wait;
uint64_t try_harder_start; uint64_t try_harder_start;
/* /*
...@@ -759,7 +585,7 @@ struct cache_set { ...@@ -759,7 +585,7 @@ struct cache_set {
* written. * written.
*/ */
atomic_t prio_blocked; atomic_t prio_blocked;
struct closure_waitlist bucket_wait; wait_queue_head_t bucket_wait;
/* /*
* For any bio we don't skip we subtract the number of sectors from * For any bio we don't skip we subtract the number of sectors from
...@@ -782,7 +608,7 @@ struct cache_set { ...@@ -782,7 +608,7 @@ struct cache_set {
struct gc_stat gc_stats; struct gc_stat gc_stats;
size_t nbuckets; size_t nbuckets;
struct closure_with_waitlist gc; struct task_struct *gc_thread;
/* Where in the btree gc currently is */ /* Where in the btree gc currently is */
struct bkey gc_done; struct bkey gc_done;
...@@ -795,11 +621,10 @@ struct cache_set { ...@@ -795,11 +621,10 @@ struct cache_set {
/* Counts how many sectors bio_insert has added to the cache */ /* Counts how many sectors bio_insert has added to the cache */
atomic_t sectors_to_gc; atomic_t sectors_to_gc;
struct closure moving_gc; wait_queue_head_t moving_gc_wait;
struct closure_waitlist moving_gc_wait;
struct keybuf moving_gc_keys; struct keybuf moving_gc_keys;
/* Number of moving GC bios in flight */ /* Number of moving GC bios in flight */
atomic_t in_flight; struct semaphore moving_in_flight;
struct btree *root; struct btree *root;
...@@ -841,22 +666,27 @@ struct cache_set { ...@@ -841,22 +666,27 @@ struct cache_set {
unsigned congested_read_threshold_us; unsigned congested_read_threshold_us;
unsigned congested_write_threshold_us; unsigned congested_write_threshold_us;
spinlock_t sort_time_lock;
struct time_stats sort_time; struct time_stats sort_time;
struct time_stats btree_gc_time; struct time_stats btree_gc_time;
struct time_stats btree_split_time; struct time_stats btree_split_time;
spinlock_t btree_read_time_lock;
struct time_stats btree_read_time; struct time_stats btree_read_time;
struct time_stats try_harder_time; struct time_stats try_harder_time;
atomic_long_t cache_read_races; atomic_long_t cache_read_races;
atomic_long_t writeback_keys_done; atomic_long_t writeback_keys_done;
atomic_long_t writeback_keys_failed; atomic_long_t writeback_keys_failed;
enum {
ON_ERROR_UNREGISTER,
ON_ERROR_PANIC,
} on_error;
unsigned error_limit; unsigned error_limit;
unsigned error_decay; unsigned error_decay;
unsigned short journal_delay_ms; unsigned short journal_delay_ms;
unsigned verify:1; unsigned verify:1;
unsigned key_merging_disabled:1; unsigned key_merging_disabled:1;
unsigned expensive_debug_checks:1;
unsigned gc_always_rewrite:1; unsigned gc_always_rewrite:1;
unsigned shrinker_disabled:1; unsigned shrinker_disabled:1;
unsigned copy_gc_enabled:1; unsigned copy_gc_enabled:1;
...@@ -865,21 +695,6 @@ struct cache_set { ...@@ -865,21 +695,6 @@ struct cache_set {
struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS]; struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
}; };
static inline bool key_merging_disabled(struct cache_set *c)
{
#ifdef CONFIG_BCACHE_DEBUG
return c->key_merging_disabled;
#else
return 0;
#endif
}
static inline bool SB_IS_BDEV(const struct cache_sb *sb)
{
return sb->version == BCACHE_SB_VERSION_BDEV
|| sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
}
struct bbio { struct bbio {
unsigned submit_time_us; unsigned submit_time_us;
union { union {
...@@ -933,59 +748,6 @@ static inline unsigned local_clock_us(void) ...@@ -933,59 +748,6 @@ static inline unsigned local_clock_us(void)
#define prio_buckets(c) \ #define prio_buckets(c) \
DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c)) DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c))
#define JSET_MAGIC 0x245235c1a3625032ULL
#define PSET_MAGIC 0x6750e15f87337f91ULL
#define BSET_MAGIC 0x90135c78b99e07f5ULL
#define jset_magic(c) ((c)->sb.set_magic ^ JSET_MAGIC)
#define pset_magic(c) ((c)->sb.set_magic ^ PSET_MAGIC)
#define bset_magic(c) ((c)->sb.set_magic ^ BSET_MAGIC)
/* Bkey fields: all units are in sectors */
#define KEY_FIELD(name, field, offset, size) \
BITMASK(name, struct bkey, field, offset, size)
#define PTR_FIELD(name, offset, size) \
static inline uint64_t name(const struct bkey *k, unsigned i) \
{ return (k->ptr[i] >> offset) & ~(((uint64_t) ~0) << size); } \
\
static inline void SET_##name(struct bkey *k, unsigned i, uint64_t v)\
{ \
k->ptr[i] &= ~(~((uint64_t) ~0 << size) << offset); \
k->ptr[i] |= v << offset; \
}
KEY_FIELD(KEY_PTRS, high, 60, 3)
KEY_FIELD(HEADER_SIZE, high, 58, 2)
KEY_FIELD(KEY_CSUM, high, 56, 2)
KEY_FIELD(KEY_PINNED, high, 55, 1)
KEY_FIELD(KEY_DIRTY, high, 36, 1)
KEY_FIELD(KEY_SIZE, high, 20, 16)
KEY_FIELD(KEY_INODE, high, 0, 20)
/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */
static inline uint64_t KEY_OFFSET(const struct bkey *k)
{
return k->low;
}
static inline void SET_KEY_OFFSET(struct bkey *k, uint64_t v)
{
k->low = v;
}
PTR_FIELD(PTR_DEV, 51, 12)
PTR_FIELD(PTR_OFFSET, 8, 43)
PTR_FIELD(PTR_GEN, 0, 8)
#define PTR_CHECK_DEV ((1 << 12) - 1)
#define PTR(gen, offset, dev) \
((((uint64_t) dev) << 51) | ((uint64_t) offset) << 8 | gen)
static inline size_t sector_to_bucket(struct cache_set *c, sector_t s) static inline size_t sector_to_bucket(struct cache_set *c, sector_t s)
{ {
return s >> c->bucket_bits; return s >> c->bucket_bits;
...@@ -1024,27 +786,11 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c, ...@@ -1024,27 +786,11 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c,
/* Btree key macros */ /* Btree key macros */
/*
* The high bit being set is a relic from when we used it to do binary
* searches - it told you where a key started. It's not used anymore,
* and can probably be safely dropped.
*/
#define KEY(dev, sector, len) \
((struct bkey) { \
.high = (1ULL << 63) | ((uint64_t) (len) << 20) | (dev), \
.low = (sector) \
})
static inline void bkey_init(struct bkey *k) static inline void bkey_init(struct bkey *k)
{ {
*k = KEY(0, 0, 0); *k = ZERO_KEY;
} }
#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k))
#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0)
#define MAX_KEY KEY(~(~0 << 20), ((uint64_t) ~0) >> 1, 0)
#define ZERO_KEY KEY(0, 0, 0)
/* /*
* This is used for various on disk data structures - cache_sb, prio_set, bset, * This is used for various on disk data structures - cache_sb, prio_set, bset,
* jset: The checksum is _always_ the first 8 bytes of these structs * jset: The checksum is _always_ the first 8 bytes of these structs
...@@ -1094,14 +840,6 @@ do { \ ...@@ -1094,14 +840,6 @@ do { \
for (b = (ca)->buckets + (ca)->sb.first_bucket; \ for (b = (ca)->buckets + (ca)->sb.first_bucket; \
b < (ca)->buckets + (ca)->sb.nbuckets; b++) b < (ca)->buckets + (ca)->sb.nbuckets; b++)
static inline void __bkey_put(struct cache_set *c, struct bkey *k)
{
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++)
atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin);
}
static inline void cached_dev_put(struct cached_dev *dc) static inline void cached_dev_put(struct cached_dev *dc)
{ {
if (atomic_dec_and_test(&dc->count)) if (atomic_dec_and_test(&dc->count))
...@@ -1173,13 +911,15 @@ uint8_t bch_inc_gen(struct cache *, struct bucket *); ...@@ -1173,13 +911,15 @@ uint8_t bch_inc_gen(struct cache *, struct bucket *);
void bch_rescale_priorities(struct cache_set *, int); void bch_rescale_priorities(struct cache_set *, int);
bool bch_bucket_add_unused(struct cache *, struct bucket *); bool bch_bucket_add_unused(struct cache *, struct bucket *);
long bch_bucket_alloc(struct cache *, unsigned, struct closure *); long bch_bucket_alloc(struct cache *, unsigned, bool);
void bch_bucket_free(struct cache_set *, struct bkey *); void bch_bucket_free(struct cache_set *, struct bkey *);
int __bch_bucket_alloc_set(struct cache_set *, unsigned, int __bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, struct closure *); struct bkey *, int, bool);
int bch_bucket_alloc_set(struct cache_set *, unsigned, int bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, struct closure *); struct bkey *, int, bool);
bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
unsigned, unsigned, bool);
__printf(2, 3) __printf(2, 3)
bool bch_cache_set_error(struct cache_set *, const char *, ...); bool bch_cache_set_error(struct cache_set *, const char *, ...);
...@@ -1187,7 +927,7 @@ bool bch_cache_set_error(struct cache_set *, const char *, ...); ...@@ -1187,7 +927,7 @@ bool bch_cache_set_error(struct cache_set *, const char *, ...);
void bch_prio_write(struct cache *); void bch_prio_write(struct cache *);
void bch_write_bdev_super(struct cached_dev *, struct closure *); void bch_write_bdev_super(struct cached_dev *, struct closure *);
extern struct workqueue_struct *bcache_wq, *bch_gc_wq; extern struct workqueue_struct *bcache_wq;
extern const char * const bch_cache_modes[]; extern const char * const bch_cache_modes[];
extern struct mutex bch_register_lock; extern struct mutex bch_register_lock;
extern struct list_head bch_cache_sets; extern struct list_head bch_cache_sets;
...@@ -1220,15 +960,14 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *); ...@@ -1220,15 +960,14 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *);
void bch_btree_cache_free(struct cache_set *); void bch_btree_cache_free(struct cache_set *);
int bch_btree_cache_alloc(struct cache_set *); int bch_btree_cache_alloc(struct cache_set *);
void bch_moving_init_cache_set(struct cache_set *); void bch_moving_init_cache_set(struct cache_set *);
int bch_open_buckets_alloc(struct cache_set *);
void bch_open_buckets_free(struct cache_set *);
int bch_cache_allocator_start(struct cache *ca); int bch_cache_allocator_start(struct cache *ca);
void bch_cache_allocator_exit(struct cache *ca);
int bch_cache_allocator_init(struct cache *ca); int bch_cache_allocator_init(struct cache *ca);
void bch_debug_exit(void); void bch_debug_exit(void);
int bch_debug_init(struct kobject *); int bch_debug_init(struct kobject *);
void bch_writeback_exit(void);
int bch_writeback_init(void);
void bch_request_exit(void); void bch_request_exit(void);
int bch_request_init(void); int bch_request_init(void);
void bch_btree_exit(void); void bch_btree_exit(void);
......
...@@ -14,22 +14,12 @@ ...@@ -14,22 +14,12 @@
/* Keylists */ /* Keylists */
void bch_keylist_copy(struct keylist *dest, struct keylist *src)
{
*dest = *src;
if (src->list == src->d) {
size_t n = (uint64_t *) src->top - src->d;
dest->top = (struct bkey *) &dest->d[n];
dest->list = dest->d;
}
}
int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c) int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c)
{ {
unsigned oldsize = (uint64_t *) l->top - l->list; size_t oldsize = bch_keylist_nkeys(l);
unsigned newsize = oldsize + 2 + nptrs; size_t newsize = oldsize + 2 + nptrs;
uint64_t *new; uint64_t *old_keys = l->keys_p == l->inline_keys ? NULL : l->keys_p;
uint64_t *new_keys;
/* The journalling code doesn't handle the case where the keys to insert /* The journalling code doesn't handle the case where the keys to insert
* is bigger than an empty write: If we just return -ENOMEM here, * is bigger than an empty write: If we just return -ENOMEM here,
...@@ -45,24 +35,23 @@ int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c) ...@@ -45,24 +35,23 @@ int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c)
roundup_pow_of_two(oldsize) == newsize) roundup_pow_of_two(oldsize) == newsize)
return 0; return 0;
new = krealloc(l->list == l->d ? NULL : l->list, new_keys = krealloc(old_keys, sizeof(uint64_t) * newsize, GFP_NOIO);
sizeof(uint64_t) * newsize, GFP_NOIO);
if (!new) if (!new_keys)
return -ENOMEM; return -ENOMEM;
if (l->list == l->d) if (!old_keys)
memcpy(new, l->list, sizeof(uint64_t) * KEYLIST_INLINE); memcpy(new_keys, l->inline_keys, sizeof(uint64_t) * oldsize);
l->list = new; l->keys_p = new_keys;
l->top = (struct bkey *) (&l->list[oldsize]); l->top_p = new_keys + oldsize;
return 0; return 0;
} }
struct bkey *bch_keylist_pop(struct keylist *l) struct bkey *bch_keylist_pop(struct keylist *l)
{ {
struct bkey *k = l->bottom; struct bkey *k = l->keys;
if (k == l->top) if (k == l->top)
return NULL; return NULL;
...@@ -73,21 +62,20 @@ struct bkey *bch_keylist_pop(struct keylist *l) ...@@ -73,21 +62,20 @@ struct bkey *bch_keylist_pop(struct keylist *l)
return l->top = k; return l->top = k;
} }
/* Pointer validation */ void bch_keylist_pop_front(struct keylist *l)
bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k)
{ {
unsigned i; l->top_p -= bkey_u64s(l->keys);
char buf[80];
if (level && (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))) memmove(l->keys,
goto bad; bkey_next(l->keys),
bch_keylist_bytes(l));
}
if (!level && KEY_SIZE(k) > KEY_OFFSET(k)) /* Pointer validation */
goto bad;
if (!KEY_SIZE(k)) static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
return true; {
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++) for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i)) { if (ptr_available(c, k, i)) {
...@@ -98,13 +86,83 @@ bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k) ...@@ -98,13 +86,83 @@ bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k)
if (KEY_SIZE(k) + r > c->sb.bucket_size || if (KEY_SIZE(k) + r > c->sb.bucket_size ||
bucket < ca->sb.first_bucket || bucket < ca->sb.first_bucket ||
bucket >= ca->sb.nbuckets) bucket >= ca->sb.nbuckets)
goto bad; return true;
} }
return false;
}
bool bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
{
char buf[80];
if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
goto bad;
if (__ptr_invalid(c, k))
goto bad;
return false;
bad:
bch_bkey_to_text(buf, sizeof(buf), k);
cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
return true;
}
bool bch_extent_ptr_invalid(struct cache_set *c, const struct bkey *k)
{
char buf[80];
if (!KEY_SIZE(k))
return true;
if (KEY_SIZE(k) > KEY_OFFSET(k))
goto bad;
if (__ptr_invalid(c, k))
goto bad;
return false; return false;
bad: bad:
bch_bkey_to_text(buf, sizeof(buf), k); bch_bkey_to_text(buf, sizeof(buf), k);
cache_bug(c, "spotted bad key %s: %s", buf, bch_ptr_status(c, k)); cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
return true;
}
static bool ptr_bad_expensive_checks(struct btree *b, const struct bkey *k,
unsigned ptr)
{
struct bucket *g = PTR_BUCKET(b->c, k, ptr);
char buf[80];
if (mutex_trylock(&b->c->bucket_lock)) {
if (b->level) {
if (KEY_DIRTY(k) ||
g->prio != BTREE_PRIO ||
(b->c->gc_mark_valid &&
GC_MARK(g) != GC_MARK_METADATA))
goto err;
} else {
if (g->prio == BTREE_PRIO)
goto err;
if (KEY_DIRTY(k) &&
b->c->gc_mark_valid &&
GC_MARK(g) != GC_MARK_DIRTY)
goto err;
}
mutex_unlock(&b->c->bucket_lock);
}
return false;
err:
mutex_unlock(&b->c->bucket_lock);
bch_bkey_to_text(buf, sizeof(buf), k);
btree_bug(b,
"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
return true; return true;
} }
...@@ -118,64 +176,29 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k) ...@@ -118,64 +176,29 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k)
bch_ptr_invalid(b, k)) bch_ptr_invalid(b, k))
return true; return true;
if (KEY_PTRS(k) && PTR_DEV(k, 0) == PTR_CHECK_DEV) for (i = 0; i < KEY_PTRS(k); i++) {
return true; if (!ptr_available(b->c, k, i))
return true;
for (i = 0; i < KEY_PTRS(k); i++) g = PTR_BUCKET(b->c, k, i);
if (ptr_available(b->c, k, i)) { stale = ptr_stale(b->c, k, i);
g = PTR_BUCKET(b->c, k, i);
stale = ptr_stale(b->c, k, i);
btree_bug_on(stale > 96, b, btree_bug_on(stale > 96, b,
"key too stale: %i, need_gc %u", "key too stale: %i, need_gc %u",
stale, b->c->need_gc); stale, b->c->need_gc);
btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k), btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k),
b, "stale dirty pointer"); b, "stale dirty pointer");
if (stale) if (stale)
return true; return true;
#ifdef CONFIG_BCACHE_EDEBUG if (expensive_debug_checks(b->c) &&
if (!mutex_trylock(&b->c->bucket_lock)) ptr_bad_expensive_checks(b, k, i))
continue; return true;
}
if (b->level) {
if (KEY_DIRTY(k) ||
g->prio != BTREE_PRIO ||
(b->c->gc_mark_valid &&
GC_MARK(g) != GC_MARK_METADATA))
goto bug;
} else {
if (g->prio == BTREE_PRIO)
goto bug;
if (KEY_DIRTY(k) &&
b->c->gc_mark_valid &&
GC_MARK(g) != GC_MARK_DIRTY)
goto bug;
}
mutex_unlock(&b->c->bucket_lock);
#endif
}
return false; return false;
#ifdef CONFIG_BCACHE_EDEBUG
bug:
mutex_unlock(&b->c->bucket_lock);
{
char buf[80];
bch_bkey_to_text(buf, sizeof(buf), k);
btree_bug(b,
"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
}
return true;
#endif
} }
/* Key/pointer manipulation */ /* Key/pointer manipulation */
...@@ -458,16 +481,8 @@ static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline) ...@@ -458,16 +481,8 @@ static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline)
static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift) static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift)
{ {
#ifdef CONFIG_X86_64
asm("shrd %[shift],%[high],%[low]"
: [low] "+Rm" (low)
: [high] "R" (high),
[shift] "ci" (shift)
: "cc");
#else
low >>= shift; low >>= shift;
low |= (high << 1) << (63U - shift); low |= (high << 1) << (63U - shift);
#endif
return low; return low;
} }
...@@ -686,7 +701,7 @@ void bch_bset_init_next(struct btree *b) ...@@ -686,7 +701,7 @@ void bch_bset_init_next(struct btree *b)
} else } else
get_random_bytes(&i->seq, sizeof(uint64_t)); get_random_bytes(&i->seq, sizeof(uint64_t));
i->magic = bset_magic(b->c); i->magic = bset_magic(&b->c->sb);
i->version = 0; i->version = 0;
i->keys = 0; i->keys = 0;
...@@ -824,16 +839,16 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, ...@@ -824,16 +839,16 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t,
} else } else
i = bset_search_write_set(b, t, search); i = bset_search_write_set(b, t, search);
#ifdef CONFIG_BCACHE_EDEBUG if (expensive_debug_checks(b->c)) {
BUG_ON(bset_written(b, t) && BUG_ON(bset_written(b, t) &&
i.l != t->data->start && i.l != t->data->start &&
bkey_cmp(tree_to_prev_bkey(t, bkey_cmp(tree_to_prev_bkey(t,
inorder_to_tree(bkey_to_cacheline(t, i.l), t)), inorder_to_tree(bkey_to_cacheline(t, i.l), t)),
search) > 0); search) > 0);
BUG_ON(i.r != end(t->data) && BUG_ON(i.r != end(t->data) &&
bkey_cmp(i.r, search) <= 0); bkey_cmp(i.r, search) <= 0);
#endif }
while (likely(i.l != i.r) && while (likely(i.l != i.r) &&
bkey_cmp(i.l, search) <= 0) bkey_cmp(i.l, search) <= 0)
...@@ -844,6 +859,13 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t, ...@@ -844,6 +859,13 @@ struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t,
/* Btree iterator */ /* Btree iterator */
/*
* Returns true if l > r - unless l == r, in which case returns true if l is
* older than r.
*
* Necessary for btree_sort_fixup() - if there are multiple keys that compare
* equal in different sets, we have to process them newest to oldest.
*/
static inline bool btree_iter_cmp(struct btree_iter_set l, static inline bool btree_iter_cmp(struct btree_iter_set l,
struct btree_iter_set r) struct btree_iter_set r)
{ {
...@@ -867,12 +889,16 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, ...@@ -867,12 +889,16 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
} }
struct bkey *__bch_btree_iter_init(struct btree *b, struct btree_iter *iter, struct bkey *__bch_btree_iter_init(struct btree *b, struct btree_iter *iter,
struct bkey *search, struct bset_tree *start) struct bkey *search, struct bset_tree *start)
{ {
struct bkey *ret = NULL; struct bkey *ret = NULL;
iter->size = ARRAY_SIZE(iter->data); iter->size = ARRAY_SIZE(iter->data);
iter->used = 0; iter->used = 0;
#ifdef CONFIG_BCACHE_DEBUG
iter->b = b;
#endif
for (; start <= &b->sets[b->nsets]; start++) { for (; start <= &b->sets[b->nsets]; start++) {
ret = bch_bset_search(b, start, search); ret = bch_bset_search(b, start, search);
bch_btree_iter_push(iter, ret, end(start->data)); bch_btree_iter_push(iter, ret, end(start->data));
...@@ -887,6 +913,8 @@ struct bkey *bch_btree_iter_next(struct btree_iter *iter) ...@@ -887,6 +913,8 @@ struct bkey *bch_btree_iter_next(struct btree_iter *iter)
struct bkey *ret = NULL; struct bkey *ret = NULL;
if (!btree_iter_end(iter)) { if (!btree_iter_end(iter)) {
bch_btree_iter_next_check(iter);
ret = iter->data->k; ret = iter->data->k;
iter->data->k = bkey_next(iter->data->k); iter->data->k = bkey_next(iter->data->k);
...@@ -916,14 +944,6 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, ...@@ -916,14 +944,6 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
return ret; return ret;
} }
struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search)
{
struct btree_iter iter;
bch_btree_iter_init(b, &iter, search);
return bch_btree_iter_next_filter(&iter, b, bch_ptr_bad);
}
/* Mergesort */ /* Mergesort */
static void sort_key_next(struct btree_iter *iter, static void sort_key_next(struct btree_iter *iter,
...@@ -998,7 +1018,6 @@ static void btree_mergesort(struct btree *b, struct bset *out, ...@@ -998,7 +1018,6 @@ static void btree_mergesort(struct btree *b, struct bset *out,
out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0; out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0;
pr_debug("sorted %i keys", out->keys); pr_debug("sorted %i keys", out->keys);
bch_check_key_order(b, out);
} }
static void __btree_sort(struct btree *b, struct btree_iter *iter, static void __btree_sort(struct btree *b, struct btree_iter *iter,
...@@ -1029,7 +1048,7 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, ...@@ -1029,7 +1048,7 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter,
* memcpy() * memcpy()
*/ */
out->magic = bset_magic(b->c); out->magic = bset_magic(&b->c->sb);
out->seq = b->sets[0].data->seq; out->seq = b->sets[0].data->seq;
out->version = b->sets[0].data->version; out->version = b->sets[0].data->version;
swap(out, b->sets[0].data); swap(out, b->sets[0].data);
...@@ -1050,24 +1069,21 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, ...@@ -1050,24 +1069,21 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter,
if (b->written) if (b->written)
bset_build_written_tree(b); bset_build_written_tree(b);
if (!start) { if (!start)
spin_lock(&b->c->sort_time_lock);
bch_time_stats_update(&b->c->sort_time, start_time); bch_time_stats_update(&b->c->sort_time, start_time);
spin_unlock(&b->c->sort_time_lock);
}
} }
void bch_btree_sort_partial(struct btree *b, unsigned start) void bch_btree_sort_partial(struct btree *b, unsigned start)
{ {
size_t oldsize = 0, order = b->page_order, keys = 0; size_t order = b->page_order, keys = 0;
struct btree_iter iter; struct btree_iter iter;
int oldsize = bch_count_data(b);
__bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]);
BUG_ON(b->sets[b->nsets].data == write_block(b) && BUG_ON(b->sets[b->nsets].data == write_block(b) &&
(b->sets[b->nsets].size || b->nsets)); (b->sets[b->nsets].size || b->nsets));
if (b->written)
oldsize = bch_count_data(b);
if (start) { if (start) {
unsigned i; unsigned i;
...@@ -1083,7 +1099,7 @@ void bch_btree_sort_partial(struct btree *b, unsigned start) ...@@ -1083,7 +1099,7 @@ void bch_btree_sort_partial(struct btree *b, unsigned start)
__btree_sort(b, &iter, start, order, false); __btree_sort(b, &iter, start, order, false);
EBUG_ON(b->written && bch_count_data(b) != oldsize); EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize);
} }
void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter) void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter)
...@@ -1101,9 +1117,7 @@ void bch_btree_sort_into(struct btree *b, struct btree *new) ...@@ -1101,9 +1117,7 @@ void bch_btree_sort_into(struct btree *b, struct btree *new)
btree_mergesort(b, new->sets->data, &iter, false, true); btree_mergesort(b, new->sets->data, &iter, false, true);
spin_lock(&b->c->sort_time_lock);
bch_time_stats_update(&b->c->sort_time, start_time); bch_time_stats_update(&b->c->sort_time, start_time);
spin_unlock(&b->c->sort_time_lock);
bkey_copy_key(&new->key, &b->key); bkey_copy_key(&new->key, &b->key);
new->sets->size = 0; new->sets->size = 0;
...@@ -1148,16 +1162,16 @@ void bch_btree_sort_lazy(struct btree *b) ...@@ -1148,16 +1162,16 @@ void bch_btree_sort_lazy(struct btree *b)
/* Sysfs stuff */ /* Sysfs stuff */
struct bset_stats { struct bset_stats {
struct btree_op op;
size_t nodes; size_t nodes;
size_t sets_written, sets_unwritten; size_t sets_written, sets_unwritten;
size_t bytes_written, bytes_unwritten; size_t bytes_written, bytes_unwritten;
size_t floats, failed; size_t floats, failed;
}; };
static int bch_btree_bset_stats(struct btree *b, struct btree_op *op, static int btree_bset_stats(struct btree_op *op, struct btree *b)
struct bset_stats *stats)
{ {
struct bkey *k; struct bset_stats *stats = container_of(op, struct bset_stats, op);
unsigned i; unsigned i;
stats->nodes++; stats->nodes++;
...@@ -1182,30 +1196,19 @@ static int bch_btree_bset_stats(struct btree *b, struct btree_op *op, ...@@ -1182,30 +1196,19 @@ static int bch_btree_bset_stats(struct btree *b, struct btree_op *op,
} }
} }
if (b->level) { return MAP_CONTINUE;
struct btree_iter iter;
for_each_key_filter(b, k, &iter, bch_ptr_bad) {
int ret = btree(bset_stats, k, b, op, stats);
if (ret)
return ret;
}
}
return 0;
} }
int bch_bset_print_stats(struct cache_set *c, char *buf) int bch_bset_print_stats(struct cache_set *c, char *buf)
{ {
struct btree_op op;
struct bset_stats t; struct bset_stats t;
int ret; int ret;
bch_btree_op_init_stack(&op);
memset(&t, 0, sizeof(struct bset_stats)); memset(&t, 0, sizeof(struct bset_stats));
bch_btree_op_init(&t.op, -1);
ret = btree_root(bset_stats, c, &op, &t); ret = bch_btree_map_nodes(&t.op, c, &ZERO_KEY, btree_bset_stats);
if (ret) if (ret < 0)
return ret; return ret;
return snprintf(buf, PAGE_SIZE, return snprintf(buf, PAGE_SIZE,
......
...@@ -148,6 +148,9 @@ ...@@ -148,6 +148,9 @@
struct btree_iter { struct btree_iter {
size_t size, used; size_t size, used;
#ifdef CONFIG_BCACHE_DEBUG
struct btree *b;
#endif
struct btree_iter_set { struct btree_iter_set {
struct bkey *k, *end; struct bkey *k, *end;
} data[MAX_BSETS]; } data[MAX_BSETS];
...@@ -193,54 +196,26 @@ static __always_inline int64_t bkey_cmp(const struct bkey *l, ...@@ -193,54 +196,26 @@ static __always_inline int64_t bkey_cmp(const struct bkey *l,
: (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
} }
static inline size_t bkey_u64s(const struct bkey *k)
{
BUG_ON(KEY_CSUM(k) > 1);
return 2 + KEY_PTRS(k) + (KEY_CSUM(k) ? 1 : 0);
}
static inline size_t bkey_bytes(const struct bkey *k)
{
return bkey_u64s(k) * sizeof(uint64_t);
}
static inline void bkey_copy(struct bkey *dest, const struct bkey *src)
{
memcpy(dest, src, bkey_bytes(src));
}
static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src)
{
if (!src)
src = &KEY(0, 0, 0);
SET_KEY_INODE(dest, KEY_INODE(src));
SET_KEY_OFFSET(dest, KEY_OFFSET(src));
}
static inline struct bkey *bkey_next(const struct bkey *k)
{
uint64_t *d = (void *) k;
return (struct bkey *) (d + bkey_u64s(k));
}
/* Keylists */ /* Keylists */
struct keylist { struct keylist {
struct bkey *top;
union { union {
uint64_t *list; struct bkey *keys;
struct bkey *bottom; uint64_t *keys_p;
};
union {
struct bkey *top;
uint64_t *top_p;
}; };
/* Enough room for btree_split's keys without realloc */ /* Enough room for btree_split's keys without realloc */
#define KEYLIST_INLINE 16 #define KEYLIST_INLINE 16
uint64_t d[KEYLIST_INLINE]; uint64_t inline_keys[KEYLIST_INLINE];
}; };
static inline void bch_keylist_init(struct keylist *l) static inline void bch_keylist_init(struct keylist *l)
{ {
l->top = (void *) (l->list = l->d); l->top_p = l->keys_p = l->inline_keys;
} }
static inline void bch_keylist_push(struct keylist *l) static inline void bch_keylist_push(struct keylist *l)
...@@ -256,17 +231,32 @@ static inline void bch_keylist_add(struct keylist *l, struct bkey *k) ...@@ -256,17 +231,32 @@ static inline void bch_keylist_add(struct keylist *l, struct bkey *k)
static inline bool bch_keylist_empty(struct keylist *l) static inline bool bch_keylist_empty(struct keylist *l)
{ {
return l->top == (void *) l->list; return l->top == l->keys;
}
static inline void bch_keylist_reset(struct keylist *l)
{
l->top = l->keys;
} }
static inline void bch_keylist_free(struct keylist *l) static inline void bch_keylist_free(struct keylist *l)
{ {
if (l->list != l->d) if (l->keys_p != l->inline_keys)
kfree(l->list); kfree(l->keys_p);
}
static inline size_t bch_keylist_nkeys(struct keylist *l)
{
return l->top_p - l->keys_p;
}
static inline size_t bch_keylist_bytes(struct keylist *l)
{
return bch_keylist_nkeys(l) * sizeof(uint64_t);
} }
void bch_keylist_copy(struct keylist *, struct keylist *);
struct bkey *bch_keylist_pop(struct keylist *); struct bkey *bch_keylist_pop(struct keylist *);
void bch_keylist_pop_front(struct keylist *);
int bch_keylist_realloc(struct keylist *, int, struct cache_set *); int bch_keylist_realloc(struct keylist *, int, struct cache_set *);
void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
...@@ -287,7 +277,9 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) ...@@ -287,7 +277,9 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
} }
const char *bch_ptr_status(struct cache_set *, const struct bkey *); const char *bch_ptr_status(struct cache_set *, const struct bkey *);
bool __bch_ptr_invalid(struct cache_set *, int level, const struct bkey *); bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *);
bool bch_ptr_bad(struct btree *, const struct bkey *); bool bch_ptr_bad(struct btree *, const struct bkey *);
static inline uint8_t gen_after(uint8_t a, uint8_t b) static inline uint8_t gen_after(uint8_t a, uint8_t b)
...@@ -311,7 +303,6 @@ static inline bool ptr_available(struct cache_set *c, const struct bkey *k, ...@@ -311,7 +303,6 @@ static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *); typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *);
struct bkey *bch_next_recurse_key(struct btree *, struct bkey *);
struct bkey *bch_btree_iter_next(struct btree_iter *); struct bkey *bch_btree_iter_next(struct btree_iter *);
struct bkey *bch_btree_iter_next_filter(struct btree_iter *, struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
struct btree *, ptr_filter_fn); struct btree *, ptr_filter_fn);
...@@ -361,12 +352,30 @@ void bch_bset_fix_lookup_table(struct btree *, struct bkey *); ...@@ -361,12 +352,30 @@ void bch_bset_fix_lookup_table(struct btree *, struct bkey *);
struct bkey *__bch_bset_search(struct btree *, struct bset_tree *, struct bkey *__bch_bset_search(struct btree *, struct bset_tree *,
const struct bkey *); const struct bkey *);
/*
* Returns the first key that is strictly greater than search
*/
static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
const struct bkey *search) const struct bkey *search)
{ {
return search ? __bch_bset_search(b, t, search) : t->data->start; return search ? __bch_bset_search(b, t, search) : t->data->start;
} }
#define PRECEDING_KEY(_k) \
({ \
struct bkey *_ret = NULL; \
\
if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \
_ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \
\
if (!_ret->low) \
_ret->high--; \
_ret->low--; \
} \
\
_ret; \
})
bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
void bch_btree_sort_lazy(struct btree *); void bch_btree_sort_lazy(struct btree *);
void bch_btree_sort_into(struct btree *, struct btree *); void bch_btree_sort_into(struct btree *, struct btree *);
......
This diff is collapsed.
...@@ -125,6 +125,7 @@ struct btree { ...@@ -125,6 +125,7 @@ struct btree {
unsigned long seq; unsigned long seq;
struct rw_semaphore lock; struct rw_semaphore lock;
struct cache_set *c; struct cache_set *c;
struct btree *parent;
unsigned long flags; unsigned long flags;
uint16_t written; /* would be nice to kill */ uint16_t written; /* would be nice to kill */
...@@ -200,12 +201,7 @@ static inline bool bkey_written(struct btree *b, struct bkey *k) ...@@ -200,12 +201,7 @@ static inline bool bkey_written(struct btree *b, struct bkey *k)
static inline void set_gc_sectors(struct cache_set *c) static inline void set_gc_sectors(struct cache_set *c)
{ {
atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 8); atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16);
}
static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k)
{
return __bch_ptr_invalid(b->c, b->level, k);
} }
static inline struct bkey *bch_btree_iter_init(struct btree *b, static inline struct bkey *bch_btree_iter_init(struct btree *b,
...@@ -215,6 +211,16 @@ static inline struct bkey *bch_btree_iter_init(struct btree *b, ...@@ -215,6 +211,16 @@ static inline struct bkey *bch_btree_iter_init(struct btree *b,
return __bch_btree_iter_init(b, iter, search, b->sets); return __bch_btree_iter_init(b, iter, search, b->sets);
} }
static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k)
{
if (b->level)
return bch_btree_ptr_invalid(b->c, k);
else
return bch_extent_ptr_invalid(b->c, k);
}
void bkey_put(struct cache_set *c, struct bkey *k);
/* Looping macros */ /* Looping macros */
#define for_each_cached_btree(b, c, iter) \ #define for_each_cached_btree(b, c, iter) \
...@@ -234,51 +240,17 @@ static inline struct bkey *bch_btree_iter_init(struct btree *b, ...@@ -234,51 +240,17 @@ static inline struct bkey *bch_btree_iter_init(struct btree *b,
/* Recursing down the btree */ /* Recursing down the btree */
struct btree_op { struct btree_op {
struct closure cl;
struct cache_set *c;
/* Journal entry we have a refcount on */
atomic_t *journal;
/* Bio to be inserted into the cache */
struct bio *cache_bio;
unsigned inode;
uint16_t write_prio;
/* Btree level at which we start taking write locks */ /* Btree level at which we start taking write locks */
short lock; short lock;
/* Btree insertion type */
enum {
BTREE_INSERT,
BTREE_REPLACE
} type:8;
unsigned csum:1;
unsigned skip:1;
unsigned flush_journal:1;
unsigned insert_data_done:1;
unsigned lookup_done:1;
unsigned insert_collision:1; unsigned insert_collision:1;
/* Anything after this point won't get zeroed in do_bio_hook() */
/* Keys to be inserted */
struct keylist keys;
BKEY_PADDED(replace);
}; };
enum { static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
BTREE_INSERT_STATUS_INSERT, {
BTREE_INSERT_STATUS_BACK_MERGE, memset(op, 0, sizeof(struct btree_op));
BTREE_INSERT_STATUS_OVERWROTE, op->lock = write_lock_level;
BTREE_INSERT_STATUS_FRONT_MERGE, }
};
void bch_btree_op_init_stack(struct btree_op *);
static inline void rw_lock(bool w, struct btree *b, int level) static inline void rw_lock(bool w, struct btree *b, int level)
{ {
...@@ -290,108 +262,71 @@ static inline void rw_lock(bool w, struct btree *b, int level) ...@@ -290,108 +262,71 @@ static inline void rw_lock(bool w, struct btree *b, int level)
static inline void rw_unlock(bool w, struct btree *b) static inline void rw_unlock(bool w, struct btree *b)
{ {
#ifdef CONFIG_BCACHE_EDEBUG
unsigned i;
if (w && b->key.ptr[0])
for (i = 0; i <= b->nsets; i++)
bch_check_key_order(b, b->sets[i].data);
#endif
if (w) if (w)
b->seq++; b->seq++;
(w ? up_write : up_read)(&b->lock); (w ? up_write : up_read)(&b->lock);
} }
#define insert_lock(s, b) ((b)->level <= (s)->lock) void bch_btree_node_read(struct btree *);
void bch_btree_node_write(struct btree *, struct closure *);
/* void bch_btree_set_root(struct btree *);
* These macros are for recursing down the btree - they handle the details of struct btree *bch_btree_node_alloc(struct cache_set *, int, bool);
* locking and looking up nodes in the cache for you. They're best treated as struct btree *bch_btree_node_get(struct cache_set *, struct bkey *, int, bool);
* mere syntax when reading code that uses them.
*
* op->lock determines whether we take a read or a write lock at a given depth.
* If you've got a read lock and find that you need a write lock (i.e. you're
* going to have to split), set op->lock and return -EINTR; btree_root() will
* call you again and you'll have the correct lock.
*/
/** int bch_btree_insert_check_key(struct btree *, struct btree_op *,
* btree - recurse down the btree on a specified key struct bkey *);
* @fn: function to call, which will be passed the child node int bch_btree_insert(struct cache_set *, struct keylist *,
* @key: key to recurse on atomic_t *, struct bkey *);
* @b: parent btree node
* @op: pointer to struct btree_op int bch_gc_thread_start(struct cache_set *);
*/ size_t bch_btree_gc_finish(struct cache_set *);
#define btree(fn, key, b, op, ...) \ void bch_moving_gc(struct cache_set *);
({ \ int bch_btree_check(struct cache_set *);
int _r, l = (b)->level - 1; \ uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
bool _w = l <= (op)->lock; \
struct btree *_b = bch_btree_node_get((b)->c, key, l, op); \
if (!IS_ERR(_b)) { \
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
rw_unlock(_w, _b); \
} else \
_r = PTR_ERR(_b); \
_r; \
})
/**
* btree_root - call a function on the root of the btree
* @fn: function to call, which will be passed the child node
* @c: cache set
* @op: pointer to struct btree_op
*/
#define btree_root(fn, c, op, ...) \
({ \
int _r = -EINTR; \
do { \
struct btree *_b = (c)->root; \
bool _w = insert_lock(op, _b); \
rw_lock(_w, _b, _b->level); \
if (_b == (c)->root && \
_w == insert_lock(op, _b)) \
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
rw_unlock(_w, _b); \
bch_cannibalize_unlock(c, &(op)->cl); \
} while (_r == -EINTR); \
\
_r; \
})
static inline bool should_split(struct btree *b) static inline void wake_up_gc(struct cache_set *c)
{ {
struct bset *i = write_block(b); if (c->gc_thread)
return b->written >= btree_blocks(b) || wake_up_process(c->gc_thread);
(i->seq == b->sets[0].data->seq &&
b->written + __set_blocks(i, i->keys + 15, b->c)
> btree_blocks(b));
} }
void bch_btree_node_read(struct btree *); #define MAP_DONE 0
void bch_btree_node_write(struct btree *, struct closure *); #define MAP_CONTINUE 1
void bch_cannibalize_unlock(struct cache_set *, struct closure *); #define MAP_ALL_NODES 0
void bch_btree_set_root(struct btree *); #define MAP_LEAF_NODES 1
struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *);
struct btree *bch_btree_node_get(struct cache_set *, struct bkey *,
int, struct btree_op *);
bool bch_btree_insert_check_key(struct btree *, struct btree_op *, #define MAP_END_KEY 1
struct bio *);
int bch_btree_insert(struct btree_op *, struct cache_set *);
int bch_btree_search_recurse(struct btree *, struct btree_op *); typedef int (btree_map_nodes_fn)(struct btree_op *, struct btree *);
int __bch_btree_map_nodes(struct btree_op *, struct cache_set *,
struct bkey *, btree_map_nodes_fn *, int);
void bch_queue_gc(struct cache_set *); static inline int bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
size_t bch_btree_gc_finish(struct cache_set *); struct bkey *from, btree_map_nodes_fn *fn)
void bch_moving_gc(struct closure *); {
int bch_btree_check(struct cache_set *, struct btree_op *); return __bch_btree_map_nodes(op, c, from, fn, MAP_ALL_NODES);
uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *); }
static inline int bch_btree_map_leaf_nodes(struct btree_op *op,
struct cache_set *c,
struct bkey *from,
btree_map_nodes_fn *fn)
{
return __bch_btree_map_nodes(op, c, from, fn, MAP_LEAF_NODES);
}
typedef int (btree_map_keys_fn)(struct btree_op *, struct btree *,
struct bkey *);
int bch_btree_map_keys(struct btree_op *, struct cache_set *,
struct bkey *, btree_map_keys_fn *, int);
typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
void bch_keybuf_init(struct keybuf *); void bch_keybuf_init(struct keybuf *);
void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *, void bch_refill_keybuf(struct cache_set *, struct keybuf *,
keybuf_pred_fn *); struct bkey *, keybuf_pred_fn *);
bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *, bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
struct bkey *); struct bkey *);
void bch_keybuf_del(struct keybuf *, struct keybuf_key *); void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
......
...@@ -11,17 +11,6 @@ ...@@ -11,17 +11,6 @@
#include "closure.h" #include "closure.h"
void closure_queue(struct closure *cl)
{
struct workqueue_struct *wq = cl->wq;
if (wq) {
INIT_WORK(&cl->work, cl->work.func);
BUG_ON(!queue_work(wq, &cl->work));
} else
cl->fn(cl);
}
EXPORT_SYMBOL_GPL(closure_queue);
#define CL_FIELD(type, field) \ #define CL_FIELD(type, field) \
case TYPE_ ## type: \ case TYPE_ ## type: \
return &container_of(cl, struct type, cl)->field return &container_of(cl, struct type, cl)->field
...@@ -30,17 +19,6 @@ static struct closure_waitlist *closure_waitlist(struct closure *cl) ...@@ -30,17 +19,6 @@ static struct closure_waitlist *closure_waitlist(struct closure *cl)
{ {
switch (cl->type) { switch (cl->type) {
CL_FIELD(closure_with_waitlist, wait); CL_FIELD(closure_with_waitlist, wait);
CL_FIELD(closure_with_waitlist_and_timer, wait);
default:
return NULL;
}
}
static struct timer_list *closure_timer(struct closure *cl)
{
switch (cl->type) {
CL_FIELD(closure_with_timer, timer);
CL_FIELD(closure_with_waitlist_and_timer, timer);
default: default:
return NULL; return NULL;
} }
...@@ -51,7 +29,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) ...@@ -51,7 +29,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
int r = flags & CLOSURE_REMAINING_MASK; int r = flags & CLOSURE_REMAINING_MASK;
BUG_ON(flags & CLOSURE_GUARD_MASK); BUG_ON(flags & CLOSURE_GUARD_MASK);
BUG_ON(!r && (flags & ~(CLOSURE_DESTRUCTOR|CLOSURE_BLOCKING))); BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
/* Must deliver precisely one wakeup */ /* Must deliver precisely one wakeup */
if (r == 1 && (flags & CLOSURE_SLEEPING)) if (r == 1 && (flags & CLOSURE_SLEEPING))
...@@ -59,7 +37,6 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) ...@@ -59,7 +37,6 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
if (!r) { if (!r) {
if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) { if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
/* CLOSURE_BLOCKING might be set - clear it */
atomic_set(&cl->remaining, atomic_set(&cl->remaining,
CLOSURE_REMAINING_INITIALIZER); CLOSURE_REMAINING_INITIALIZER);
closure_queue(cl); closure_queue(cl);
...@@ -90,13 +67,13 @@ void closure_sub(struct closure *cl, int v) ...@@ -90,13 +67,13 @@ void closure_sub(struct closure *cl, int v)
{ {
closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining)); closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
} }
EXPORT_SYMBOL_GPL(closure_sub); EXPORT_SYMBOL(closure_sub);
void closure_put(struct closure *cl) void closure_put(struct closure *cl)
{ {
closure_put_after_sub(cl, atomic_dec_return(&cl->remaining)); closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
} }
EXPORT_SYMBOL_GPL(closure_put); EXPORT_SYMBOL(closure_put);
static void set_waiting(struct closure *cl, unsigned long f) static void set_waiting(struct closure *cl, unsigned long f)
{ {
...@@ -133,7 +110,7 @@ void __closure_wake_up(struct closure_waitlist *wait_list) ...@@ -133,7 +110,7 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
closure_sub(cl, CLOSURE_WAITING + 1); closure_sub(cl, CLOSURE_WAITING + 1);
} }
} }
EXPORT_SYMBOL_GPL(__closure_wake_up); EXPORT_SYMBOL(__closure_wake_up);
bool closure_wait(struct closure_waitlist *list, struct closure *cl) bool closure_wait(struct closure_waitlist *list, struct closure *cl)
{ {
...@@ -146,7 +123,7 @@ bool closure_wait(struct closure_waitlist *list, struct closure *cl) ...@@ -146,7 +123,7 @@ bool closure_wait(struct closure_waitlist *list, struct closure *cl)
return true; return true;
} }
EXPORT_SYMBOL_GPL(closure_wait); EXPORT_SYMBOL(closure_wait);
/** /**
* closure_sync() - sleep until a closure a closure has nothing left to wait on * closure_sync() - sleep until a closure a closure has nothing left to wait on
...@@ -169,7 +146,7 @@ void closure_sync(struct closure *cl) ...@@ -169,7 +146,7 @@ void closure_sync(struct closure *cl)
__closure_end_sleep(cl); __closure_end_sleep(cl);
} }
EXPORT_SYMBOL_GPL(closure_sync); EXPORT_SYMBOL(closure_sync);
/** /**
* closure_trylock() - try to acquire the closure, without waiting * closure_trylock() - try to acquire the closure, without waiting
...@@ -183,17 +160,17 @@ bool closure_trylock(struct closure *cl, struct closure *parent) ...@@ -183,17 +160,17 @@ bool closure_trylock(struct closure *cl, struct closure *parent)
CLOSURE_REMAINING_INITIALIZER) != -1) CLOSURE_REMAINING_INITIALIZER) != -1)
return false; return false;
closure_set_ret_ip(cl);
smp_mb(); smp_mb();
cl->parent = parent; cl->parent = parent;
if (parent) if (parent)
closure_get(parent); closure_get(parent);
closure_set_ret_ip(cl);
closure_debug_create(cl); closure_debug_create(cl);
return true; return true;
} }
EXPORT_SYMBOL_GPL(closure_trylock); EXPORT_SYMBOL(closure_trylock);
void __closure_lock(struct closure *cl, struct closure *parent, void __closure_lock(struct closure *cl, struct closure *parent,
struct closure_waitlist *wait_list) struct closure_waitlist *wait_list)
...@@ -205,57 +182,11 @@ void __closure_lock(struct closure *cl, struct closure *parent, ...@@ -205,57 +182,11 @@ void __closure_lock(struct closure *cl, struct closure *parent,
if (closure_trylock(cl, parent)) if (closure_trylock(cl, parent))
return; return;
closure_wait_event_sync(wait_list, &wait, closure_wait_event(wait_list, &wait,
atomic_read(&cl->remaining) == -1); atomic_read(&cl->remaining) == -1);
} }
} }
EXPORT_SYMBOL_GPL(__closure_lock); EXPORT_SYMBOL(__closure_lock);
static void closure_delay_timer_fn(unsigned long data)
{
struct closure *cl = (struct closure *) data;
closure_sub(cl, CLOSURE_TIMER + 1);
}
void do_closure_timer_init(struct closure *cl)
{
struct timer_list *timer = closure_timer(cl);
init_timer(timer);
timer->data = (unsigned long) cl;
timer->function = closure_delay_timer_fn;
}
EXPORT_SYMBOL_GPL(do_closure_timer_init);
bool __closure_delay(struct closure *cl, unsigned long delay,
struct timer_list *timer)
{
if (atomic_read(&cl->remaining) & CLOSURE_TIMER)
return false;
BUG_ON(timer_pending(timer));
timer->expires = jiffies + delay;
atomic_add(CLOSURE_TIMER + 1, &cl->remaining);
add_timer(timer);
return true;
}
EXPORT_SYMBOL_GPL(__closure_delay);
void __closure_flush(struct closure *cl, struct timer_list *timer)
{
if (del_timer(timer))
closure_sub(cl, CLOSURE_TIMER + 1);
}
EXPORT_SYMBOL_GPL(__closure_flush);
void __closure_flush_sync(struct closure *cl, struct timer_list *timer)
{
if (del_timer_sync(timer))
closure_sub(cl, CLOSURE_TIMER + 1);
}
EXPORT_SYMBOL_GPL(__closure_flush_sync);
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
...@@ -273,7 +204,7 @@ void closure_debug_create(struct closure *cl) ...@@ -273,7 +204,7 @@ void closure_debug_create(struct closure *cl)
list_add(&cl->all, &closure_list); list_add(&cl->all, &closure_list);
spin_unlock_irqrestore(&closure_list_lock, flags); spin_unlock_irqrestore(&closure_list_lock, flags);
} }
EXPORT_SYMBOL_GPL(closure_debug_create); EXPORT_SYMBOL(closure_debug_create);
void closure_debug_destroy(struct closure *cl) void closure_debug_destroy(struct closure *cl)
{ {
...@@ -286,7 +217,7 @@ void closure_debug_destroy(struct closure *cl) ...@@ -286,7 +217,7 @@ void closure_debug_destroy(struct closure *cl)
list_del(&cl->all); list_del(&cl->all);
spin_unlock_irqrestore(&closure_list_lock, flags); spin_unlock_irqrestore(&closure_list_lock, flags);
} }
EXPORT_SYMBOL_GPL(closure_debug_destroy); EXPORT_SYMBOL(closure_debug_destroy);
static struct dentry *debug; static struct dentry *debug;
...@@ -304,14 +235,12 @@ static int debug_seq_show(struct seq_file *f, void *data) ...@@ -304,14 +235,12 @@ static int debug_seq_show(struct seq_file *f, void *data)
cl, (void *) cl->ip, cl->fn, cl->parent, cl, (void *) cl->ip, cl->fn, cl->parent,
r & CLOSURE_REMAINING_MASK); r & CLOSURE_REMAINING_MASK);
seq_printf(f, "%s%s%s%s%s%s\n", seq_printf(f, "%s%s%s%s\n",
test_bit(WORK_STRUCT_PENDING, test_bit(WORK_STRUCT_PENDING,
work_data_bits(&cl->work)) ? "Q" : "", work_data_bits(&cl->work)) ? "Q" : "",
r & CLOSURE_RUNNING ? "R" : "", r & CLOSURE_RUNNING ? "R" : "",
r & CLOSURE_BLOCKING ? "B" : "",
r & CLOSURE_STACK ? "S" : "", r & CLOSURE_STACK ? "S" : "",
r & CLOSURE_SLEEPING ? "Sl" : "", r & CLOSURE_SLEEPING ? "Sl" : "");
r & CLOSURE_TIMER ? "T" : "");
if (r & CLOSURE_WAITING) if (r & CLOSURE_WAITING)
seq_printf(f, " W %pF\n", seq_printf(f, " W %pF\n",
......
...@@ -155,21 +155,6 @@ ...@@ -155,21 +155,6 @@
* delayed_work embeds a work item and a timer_list. The important thing is, use * delayed_work embeds a work item and a timer_list. The important thing is, use
* it exactly like you would a regular closure and closure_put() will magically * it exactly like you would a regular closure and closure_put() will magically
* handle everything for you. * handle everything for you.
*
* We've got closures that embed timers, too. They're called, appropriately
* enough:
* struct closure_with_timer;
*
* This gives you access to closure_delay(). It takes a refcount for a specified
* number of jiffies - you could then call closure_sync() (for a slightly
* convoluted version of msleep()) or continue_at() - which gives you the same
* effect as using a delayed work item, except you can reuse the work_struct
* already embedded in struct closure.
*
* Lastly, there's struct closure_with_waitlist_and_timer. It does what you
* probably expect, if you happen to need the features of both. (You don't
* really want to know how all this is implemented, but if I've done my job
* right you shouldn't have to care).
*/ */
struct closure; struct closure;
...@@ -182,16 +167,11 @@ struct closure_waitlist { ...@@ -182,16 +167,11 @@ struct closure_waitlist {
enum closure_type { enum closure_type {
TYPE_closure = 0, TYPE_closure = 0,
TYPE_closure_with_waitlist = 1, TYPE_closure_with_waitlist = 1,
TYPE_closure_with_timer = 2, MAX_CLOSURE_TYPE = 1,
TYPE_closure_with_waitlist_and_timer = 3,
MAX_CLOSURE_TYPE = 3,
}; };
enum closure_state { enum closure_state {
/* /*
* CLOSURE_BLOCKING: Causes closure_wait_event() to block, instead of
* waiting asynchronously
*
* CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
* the thread that owns the closure, and cleared by the thread that's * the thread that owns the closure, and cleared by the thread that's
* waking up the closure. * waking up the closure.
...@@ -200,10 +180,6 @@ enum closure_state { ...@@ -200,10 +180,6 @@ enum closure_state {
* - indicates that cl->task is valid and closure_put() may wake it up. * - indicates that cl->task is valid and closure_put() may wake it up.
* Only set or cleared by the thread that owns the closure. * Only set or cleared by the thread that owns the closure.
* *
* CLOSURE_TIMER: Analagous to CLOSURE_WAITING, indicates that a closure
* has an outstanding timer. Must be set by the thread that owns the
* closure, and cleared by the timer function when the timer goes off.
*
* The rest are for debugging and don't affect behaviour: * The rest are for debugging and don't affect behaviour:
* *
* CLOSURE_RUNNING: Set when a closure is running (i.e. by * CLOSURE_RUNNING: Set when a closure is running (i.e. by
...@@ -218,19 +194,17 @@ enum closure_state { ...@@ -218,19 +194,17 @@ enum closure_state {
* closure with this flag set * closure with this flag set
*/ */
CLOSURE_BITS_START = (1 << 19), CLOSURE_BITS_START = (1 << 23),
CLOSURE_DESTRUCTOR = (1 << 19), CLOSURE_DESTRUCTOR = (1 << 23),
CLOSURE_BLOCKING = (1 << 21), CLOSURE_WAITING = (1 << 25),
CLOSURE_WAITING = (1 << 23), CLOSURE_SLEEPING = (1 << 27),
CLOSURE_SLEEPING = (1 << 25),
CLOSURE_TIMER = (1 << 27),
CLOSURE_RUNNING = (1 << 29), CLOSURE_RUNNING = (1 << 29),
CLOSURE_STACK = (1 << 31), CLOSURE_STACK = (1 << 31),
}; };
#define CLOSURE_GUARD_MASK \ #define CLOSURE_GUARD_MASK \
((CLOSURE_DESTRUCTOR|CLOSURE_BLOCKING|CLOSURE_WAITING| \ ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_SLEEPING| \
CLOSURE_SLEEPING|CLOSURE_TIMER|CLOSURE_RUNNING|CLOSURE_STACK) << 1) CLOSURE_RUNNING|CLOSURE_STACK) << 1)
#define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1) #define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1)
#define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING) #define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING)
...@@ -268,17 +242,6 @@ struct closure_with_waitlist { ...@@ -268,17 +242,6 @@ struct closure_with_waitlist {
struct closure_waitlist wait; struct closure_waitlist wait;
}; };
struct closure_with_timer {
struct closure cl;
struct timer_list timer;
};
struct closure_with_waitlist_and_timer {
struct closure cl;
struct closure_waitlist wait;
struct timer_list timer;
};
extern unsigned invalid_closure_type(void); extern unsigned invalid_closure_type(void);
#define __CLOSURE_TYPE(cl, _t) \ #define __CLOSURE_TYPE(cl, _t) \
...@@ -289,14 +252,11 @@ extern unsigned invalid_closure_type(void); ...@@ -289,14 +252,11 @@ extern unsigned invalid_closure_type(void);
( \ ( \
__CLOSURE_TYPE(cl, closure) \ __CLOSURE_TYPE(cl, closure) \
__CLOSURE_TYPE(cl, closure_with_waitlist) \ __CLOSURE_TYPE(cl, closure_with_waitlist) \
__CLOSURE_TYPE(cl, closure_with_timer) \
__CLOSURE_TYPE(cl, closure_with_waitlist_and_timer) \
invalid_closure_type() \ invalid_closure_type() \
) )
void closure_sub(struct closure *cl, int v); void closure_sub(struct closure *cl, int v);
void closure_put(struct closure *cl); void closure_put(struct closure *cl);
void closure_queue(struct closure *cl);
void __closure_wake_up(struct closure_waitlist *list); void __closure_wake_up(struct closure_waitlist *list);
bool closure_wait(struct closure_waitlist *list, struct closure *cl); bool closure_wait(struct closure_waitlist *list, struct closure *cl);
void closure_sync(struct closure *cl); void closure_sync(struct closure *cl);
...@@ -305,12 +265,6 @@ bool closure_trylock(struct closure *cl, struct closure *parent); ...@@ -305,12 +265,6 @@ bool closure_trylock(struct closure *cl, struct closure *parent);
void __closure_lock(struct closure *cl, struct closure *parent, void __closure_lock(struct closure *cl, struct closure *parent,
struct closure_waitlist *wait_list); struct closure_waitlist *wait_list);
void do_closure_timer_init(struct closure *cl);
bool __closure_delay(struct closure *cl, unsigned long delay,
struct timer_list *timer);
void __closure_flush(struct closure *cl, struct timer_list *timer);
void __closure_flush_sync(struct closure *cl, struct timer_list *timer);
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
void closure_debug_init(void); void closure_debug_init(void);
...@@ -354,11 +308,6 @@ static inline void closure_set_stopped(struct closure *cl) ...@@ -354,11 +308,6 @@ static inline void closure_set_stopped(struct closure *cl)
atomic_sub(CLOSURE_RUNNING, &cl->remaining); atomic_sub(CLOSURE_RUNNING, &cl->remaining);
} }
static inline bool closure_is_stopped(struct closure *cl)
{
return !(atomic_read(&cl->remaining) & CLOSURE_RUNNING);
}
static inline bool closure_is_unlocked(struct closure *cl) static inline bool closure_is_unlocked(struct closure *cl)
{ {
return atomic_read(&cl->remaining) == -1; return atomic_read(&cl->remaining) == -1;
...@@ -367,14 +316,6 @@ static inline bool closure_is_unlocked(struct closure *cl) ...@@ -367,14 +316,6 @@ static inline bool closure_is_unlocked(struct closure *cl)
static inline void do_closure_init(struct closure *cl, struct closure *parent, static inline void do_closure_init(struct closure *cl, struct closure *parent,
bool running) bool running)
{ {
switch (cl->type) {
case TYPE_closure_with_timer:
case TYPE_closure_with_waitlist_and_timer:
do_closure_timer_init(cl);
default:
break;
}
cl->parent = parent; cl->parent = parent;
if (parent) if (parent)
closure_get(parent); closure_get(parent);
...@@ -429,8 +370,7 @@ do { \ ...@@ -429,8 +370,7 @@ do { \
static inline void closure_init_stack(struct closure *cl) static inline void closure_init_stack(struct closure *cl)
{ {
memset(cl, 0, sizeof(struct closure)); memset(cl, 0, sizeof(struct closure));
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER| atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|CLOSURE_STACK);
CLOSURE_BLOCKING|CLOSURE_STACK);
} }
/** /**
...@@ -461,24 +401,6 @@ do { \ ...@@ -461,24 +401,6 @@ do { \
#define closure_lock(cl, parent) \ #define closure_lock(cl, parent) \
__closure_lock(__to_internal_closure(cl), parent, &(cl)->wait) __closure_lock(__to_internal_closure(cl), parent, &(cl)->wait)
/**
* closure_delay() - delay some number of jiffies
* @cl: the closure that will sleep
* @delay: the delay in jiffies
*
* Takes a refcount on @cl which will be released after @delay jiffies; this may
* be used to have a function run after a delay with continue_at(), or
* closure_sync() may be used for a convoluted version of msleep().
*/
#define closure_delay(cl, delay) \
__closure_delay(__to_internal_closure(cl), delay, &(cl)->timer)
#define closure_flush(cl) \
__closure_flush(__to_internal_closure(cl), &(cl)->timer)
#define closure_flush_sync(cl) \
__closure_flush_sync(__to_internal_closure(cl), &(cl)->timer)
static inline void __closure_end_sleep(struct closure *cl) static inline void __closure_end_sleep(struct closure *cl)
{ {
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
...@@ -497,40 +419,6 @@ static inline void __closure_start_sleep(struct closure *cl) ...@@ -497,40 +419,6 @@ static inline void __closure_start_sleep(struct closure *cl)
atomic_add(CLOSURE_SLEEPING, &cl->remaining); atomic_add(CLOSURE_SLEEPING, &cl->remaining);
} }
/**
* closure_blocking() - returns true if the closure is in blocking mode.
*
* If a closure is in blocking mode, closure_wait_event() will sleep until the
* condition is true instead of waiting asynchronously.
*/
static inline bool closure_blocking(struct closure *cl)
{
return atomic_read(&cl->remaining) & CLOSURE_BLOCKING;
}
/**
* set_closure_blocking() - put a closure in blocking mode.
*
* If a closure is in blocking mode, closure_wait_event() will sleep until the
* condition is true instead of waiting asynchronously.
*
* Not thread safe - can only be called by the thread running the closure.
*/
static inline void set_closure_blocking(struct closure *cl)
{
if (!closure_blocking(cl))
atomic_add(CLOSURE_BLOCKING, &cl->remaining);
}
/*
* Not thread safe - can only be called by the thread running the closure.
*/
static inline void clear_closure_blocking(struct closure *cl)
{
if (closure_blocking(cl))
atomic_sub(CLOSURE_BLOCKING, &cl->remaining);
}
/** /**
* closure_wake_up() - wake up all closures on a wait list. * closure_wake_up() - wake up all closures on a wait list.
*/ */
...@@ -561,63 +449,36 @@ static inline void closure_wake_up(struct closure_waitlist *list) ...@@ -561,63 +449,36 @@ static inline void closure_wake_up(struct closure_waitlist *list)
* refcount on our closure. If this was a stack allocated closure, that would be * refcount on our closure. If this was a stack allocated closure, that would be
* bad. * bad.
*/ */
#define __closure_wait_event(list, cl, condition, _block) \ #define closure_wait_event(list, cl, condition) \
({ \ ({ \
bool block = _block; \
typeof(condition) ret; \ typeof(condition) ret; \
\ \
while (1) { \ while (1) { \
ret = (condition); \ ret = (condition); \
if (ret) { \ if (ret) { \
__closure_wake_up(list); \ __closure_wake_up(list); \
if (block) \ closure_sync(cl); \
closure_sync(cl); \
\
break; \ break; \
} \ } \
\ \
if (block) \ __closure_start_sleep(cl); \
__closure_start_sleep(cl); \
\
if (!closure_wait(list, cl)) { \
if (!block) \
break; \
\ \
if (!closure_wait(list, cl)) \
schedule(); \ schedule(); \
} \
} \ } \
\ \
ret; \ ret; \
}) })
/** static inline void closure_queue(struct closure *cl)
* closure_wait_event() - wait on a condition, synchronously or asynchronously. {
* @list: the wait list to wait on struct workqueue_struct *wq = cl->wq;
* @cl: the closure that is doing the waiting if (wq) {
* @condition: a C expression for the event to wait for INIT_WORK(&cl->work, cl->work.func);
* BUG_ON(!queue_work(wq, &cl->work));
* If the closure is in blocking mode, sleeps until the @condition evaluates to } else
* true - exactly like wait_event(). cl->fn(cl);
* }
* If the closure is not in blocking mode, waits asynchronously; if the
* condition is currently false the @cl is put onto @list and returns. @list
* owns a refcount on @cl; closure_sync() or continue_at() may be used later to
* wait for another thread to wake up @list, which drops the refcount on @cl.
*
* Returns the value of @condition; @cl will be on @list iff @condition was
* false.
*
* closure_wake_up(@list) must be called after changing any variable that could
* cause @condition to become true.
*/
#define closure_wait_event(list, cl, condition) \
__closure_wait_event(list, cl, condition, closure_blocking(cl))
#define closure_wait_event_async(list, cl, condition) \
__closure_wait_event(list, cl, condition, false)
#define closure_wait_event_sync(list, cl, condition) \
__closure_wait_event(list, cl, condition, true)
static inline void set_closure_fn(struct closure *cl, closure_fn *fn, static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
struct workqueue_struct *wq) struct workqueue_struct *wq)
...@@ -642,7 +503,7 @@ do { \ ...@@ -642,7 +503,7 @@ do { \
#define continue_at_nobarrier(_cl, _fn, _wq) \ #define continue_at_nobarrier(_cl, _fn, _wq) \
do { \ do { \
set_closure_fn(_cl, _fn, _wq); \ set_closure_fn(_cl, _fn, _wq); \
closure_queue(cl); \ closure_queue(_cl); \
return; \ return; \
} while (0) } while (0)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment