mirror of https://github.com/torvalds/linux.git
Merge branch 'bcache-updates-6.19' into for-6.19/block
Merge bcache updates from Coly for 6.19: "The major change is from me, which is to remove useless discard interface and code for cache device (not the backing device). And the last patch about gc latency is a cooperative result from Robert Pang (Google), Mingzhe Zou (Easystack) and me, by inspired from their previous works, I compose the final version and Robert prvides positive benchmark result. Marco contributes 2 patches to improve the usage of per-cpu system work queue. Gustavo contributes a patch to fix the not-at-end flexible-array member warning by gcc14. And Qianfeng contributes a code cleanup patch to remove redundant __GFP_NOWARN." Link: https://lore.kernel.org/linux-block/20251113053630.54218-1-colyli@fnnas.com/ Signed-off-by: Jens Axboe <axboe@kernel.dk> * bcache-updates-6.19: bcache: Avoid -Wflex-array-member-not-at-end warning bcache: WQ_PERCPU added to alloc_workqueue users bcache: replace use of system_wq with system_percpu_wq bcache: remove redundant __GFP_NOWARN bcache: reduce gc latency by processing less nodes and sleep less time bcache: remove discard sysfs interface document bcache: drop discard sysfs interface bcache: remove discard code from alloc.c bcache: get rid of discard code from journal
This commit is contained in:
commit
c3f42a6de7
|
|
@ -106,13 +106,6 @@ Description:
|
|||
will be discarded from the cache. Should not be turned off with
|
||||
writeback caching enabled.
|
||||
|
||||
What: /sys/block/<disk>/bcache/discard
|
||||
Date: November 2010
|
||||
Contact: Kent Overstreet <kent.overstreet@gmail.com>
|
||||
Description:
|
||||
For a cache, a boolean allowing discard/TRIM to be turned off
|
||||
or back on if the device supports it.
|
||||
|
||||
What: /sys/block/<disk>/bcache/bucket_size
|
||||
Date: November 2010
|
||||
Contact: Kent Overstreet <kent.overstreet@gmail.com>
|
||||
|
|
|
|||
|
|
@ -17,8 +17,7 @@ The latest bcache kernel code can be found from mainline Linux kernel:
|
|||
It's designed around the performance characteristics of SSDs - it only allocates
|
||||
in erase block sized buckets, and it uses a hybrid btree/log to track cached
|
||||
extents (which can be anywhere from a single sector to the bucket size). It's
|
||||
designed to avoid random writes at all costs; it fills up an erase block
|
||||
sequentially, then issues a discard before reusing it.
|
||||
designed to avoid random writes at all costs.
|
||||
|
||||
Both writethrough and writeback caching are supported. Writeback defaults to
|
||||
off, but can be switched on and off arbitrarily at runtime. Bcache goes to
|
||||
|
|
@ -618,19 +617,11 @@ bucket_size
|
|||
cache_replacement_policy
|
||||
One of either lru, fifo or random.
|
||||
|
||||
discard
|
||||
Boolean; if on a discard/TRIM will be issued to each bucket before it is
|
||||
reused. Defaults to off, since SATA TRIM is an unqueued command (and thus
|
||||
slow).
|
||||
|
||||
freelist_percent
|
||||
Size of the freelist as a percentage of nbuckets. Can be written to to
|
||||
increase the number of buckets kept on the freelist, which lets you
|
||||
artificially reduce the size of the cache at runtime. Mostly for testing
|
||||
purposes (i.e. testing how different size caches affect your hit rate), but
|
||||
since buckets are discarded when they move on to the freelist will also make
|
||||
the SSD's garbage collection easier by effectively giving it more reserved
|
||||
space.
|
||||
purposes (i.e. testing how different size caches affect your hit rate).
|
||||
|
||||
io_errors
|
||||
Number of errors that have occurred, decayed by io_error_halflife.
|
||||
|
|
|
|||
|
|
@ -24,21 +24,18 @@
|
|||
* Since the gens and priorities are all stored contiguously on disk, we can
|
||||
* batch this up: We fill up the free_inc list with freshly invalidated buckets,
|
||||
* call prio_write(), and when prio_write() finishes we pull buckets off the
|
||||
* free_inc list and optionally discard them.
|
||||
* free_inc list.
|
||||
*
|
||||
* free_inc isn't the only freelist - if it was, we'd often to sleep while
|
||||
* priorities and gens were being written before we could allocate. c->free is a
|
||||
* smaller freelist, and buckets on that list are always ready to be used.
|
||||
*
|
||||
* If we've got discards enabled, that happens when a bucket moves from the
|
||||
* free_inc list to the free list.
|
||||
*
|
||||
* There is another freelist, because sometimes we have buckets that we know
|
||||
* have nothing pointing into them - these we can reuse without waiting for
|
||||
* priorities to be rewritten. These come from freed btree nodes and buckets
|
||||
* that garbage collection discovered no longer had valid keys pointing into
|
||||
* them (because they were overwritten). That's the unused list - buckets on the
|
||||
* unused list move to the free list, optionally being discarded in the process.
|
||||
* unused list move to the free list.
|
||||
*
|
||||
* It's also important to ensure that gens don't wrap around - with respect to
|
||||
* either the oldest gen in the btree or the gen on disk. This is quite
|
||||
|
|
@ -118,8 +115,7 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
|
|||
/*
|
||||
* Background allocation thread: scans for buckets to be invalidated,
|
||||
* invalidates them, rewrites prios/gens (marking them as invalidated on disk),
|
||||
* then optionally issues discard commands to the newly free buckets, then puts
|
||||
* them on the various freelists.
|
||||
* then puts them on the various freelists.
|
||||
*/
|
||||
|
||||
static inline bool can_inc_bucket_gen(struct bucket *b)
|
||||
|
|
@ -321,8 +317,7 @@ static int bch_allocator_thread(void *arg)
|
|||
while (1) {
|
||||
/*
|
||||
* First, we pull buckets off of the unused and free_inc lists,
|
||||
* possibly issue discards to them, then we add the bucket to
|
||||
* the free list:
|
||||
* then we add the bucket to the free list:
|
||||
*/
|
||||
while (1) {
|
||||
long bucket;
|
||||
|
|
@ -330,14 +325,6 @@ static int bch_allocator_thread(void *arg)
|
|||
if (!fifo_pop(&ca->free_inc, bucket))
|
||||
break;
|
||||
|
||||
if (ca->discard) {
|
||||
mutex_unlock(&ca->set->bucket_lock);
|
||||
blkdev_issue_discard(ca->bdev,
|
||||
bucket_to_sector(ca->set, bucket),
|
||||
ca->sb.bucket_size, GFP_KERNEL);
|
||||
mutex_lock(&ca->set->bucket_lock);
|
||||
}
|
||||
|
||||
allocator_wait(ca, bch_allocator_push(ca, bucket));
|
||||
wake_up(&ca->set->btree_cache_wait);
|
||||
wake_up(&ca->set->bucket_wait);
|
||||
|
|
@ -412,7 +399,11 @@ long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
|
|||
TASK_UNINTERRUPTIBLE);
|
||||
|
||||
mutex_unlock(&ca->set->bucket_lock);
|
||||
|
||||
atomic_inc(&ca->set->bucket_wait_cnt);
|
||||
schedule();
|
||||
atomic_dec(&ca->set->bucket_wait_cnt);
|
||||
|
||||
mutex_lock(&ca->set->bucket_lock);
|
||||
} while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
|
||||
!fifo_pop(&ca->free[reserve], r));
|
||||
|
|
|
|||
|
|
@ -447,8 +447,7 @@ struct cache {
|
|||
* free_inc: Incoming buckets - these are buckets that currently have
|
||||
* cached data in them, and we can't reuse them until after we write
|
||||
* their new gen to disk. After prio_write() finishes writing the new
|
||||
* gens/prios, they'll be moved to the free list (and possibly discarded
|
||||
* in the process)
|
||||
* gens/prios, they'll be moved to the free list.
|
||||
*/
|
||||
DECLARE_FIFO(long, free)[RESERVE_NR];
|
||||
DECLARE_FIFO(long, free_inc);
|
||||
|
|
@ -467,8 +466,6 @@ struct cache {
|
|||
*/
|
||||
unsigned int invalidate_needs_gc;
|
||||
|
||||
bool discard; /* Get rid of? */
|
||||
|
||||
struct journal_device journal;
|
||||
|
||||
/* The rest of this all shows up in sysfs */
|
||||
|
|
@ -607,6 +604,7 @@ struct cache_set {
|
|||
*/
|
||||
atomic_t prio_blocked;
|
||||
wait_queue_head_t bucket_wait;
|
||||
atomic_t bucket_wait_cnt;
|
||||
|
||||
/*
|
||||
* For any bio we don't skip we subtract the number of sectors from
|
||||
|
|
|
|||
|
|
@ -327,9 +327,13 @@ struct btree_iter {
|
|||
/* Fixed-size btree_iter that can be allocated on the stack */
|
||||
|
||||
struct btree_iter_stack {
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_set stack_data[MAX_BSETS];
|
||||
/* Must be last as it ends in a flexible-array member. */
|
||||
TRAILING_OVERLAP(struct btree_iter, iter, data,
|
||||
struct btree_iter_set stack_data[MAX_BSETS];
|
||||
);
|
||||
};
|
||||
static_assert(offsetof(struct btree_iter_stack, iter.data) ==
|
||||
offsetof(struct btree_iter_stack, stack_data));
|
||||
|
||||
typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k);
|
||||
|
||||
|
|
|
|||
|
|
@ -89,8 +89,9 @@
|
|||
* Test module load/unload
|
||||
*/
|
||||
|
||||
#define MAX_GC_TIMES 100
|
||||
#define MIN_GC_NODES 100
|
||||
#define MAX_GC_TIMES_SHIFT 7 /* 128 loops */
|
||||
#define GC_NODES_MIN 10
|
||||
#define GC_SLEEP_MS_MIN 10
|
||||
#define GC_SLEEP_MS 100
|
||||
|
||||
#define PTR_DIRTY_BIT (((uint64_t) 1 << 36))
|
||||
|
|
@ -371,7 +372,7 @@ static void do_btree_node_write(struct btree *b)
|
|||
SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
|
||||
bset_sector_offset(&b->keys, i));
|
||||
|
||||
if (!bch_bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) {
|
||||
if (!bch_bio_alloc_pages(b->bio, GFP_NOWAIT)) {
|
||||
struct bio_vec *bv;
|
||||
void *addr = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
|
||||
struct bvec_iter_all iter_all;
|
||||
|
|
@ -1578,29 +1579,29 @@ static unsigned int btree_gc_count_keys(struct btree *b)
|
|||
|
||||
static size_t btree_gc_min_nodes(struct cache_set *c)
|
||||
{
|
||||
size_t min_nodes;
|
||||
size_t min_nodes = GC_NODES_MIN;
|
||||
|
||||
/*
|
||||
* Since incremental GC would stop 100ms when front
|
||||
* side I/O comes, so when there are many btree nodes,
|
||||
* if GC only processes constant (100) nodes each time,
|
||||
* GC would last a long time, and the front side I/Os
|
||||
* would run out of the buckets (since no new bucket
|
||||
* can be allocated during GC), and be blocked again.
|
||||
* So GC should not process constant nodes, but varied
|
||||
* nodes according to the number of btree nodes, which
|
||||
* realized by dividing GC into constant(100) times,
|
||||
* so when there are many btree nodes, GC can process
|
||||
* more nodes each time, otherwise, GC will process less
|
||||
* nodes each time (but no less than MIN_GC_NODES)
|
||||
*/
|
||||
min_nodes = c->gc_stats.nodes / MAX_GC_TIMES;
|
||||
if (min_nodes < MIN_GC_NODES)
|
||||
min_nodes = MIN_GC_NODES;
|
||||
if (atomic_read(&c->search_inflight) == 0) {
|
||||
size_t n = c->gc_stats.nodes >> MAX_GC_TIMES_SHIFT;
|
||||
|
||||
if (min_nodes < n)
|
||||
min_nodes = n;
|
||||
}
|
||||
|
||||
return min_nodes;
|
||||
}
|
||||
|
||||
static uint64_t btree_gc_sleep_ms(struct cache_set *c)
|
||||
{
|
||||
uint64_t sleep_ms;
|
||||
|
||||
if (atomic_read(&c->bucket_wait_cnt) > 0)
|
||||
sleep_ms = GC_SLEEP_MS_MIN;
|
||||
else
|
||||
sleep_ms = GC_SLEEP_MS;
|
||||
|
||||
return sleep_ms;
|
||||
}
|
||||
|
||||
static int btree_gc_recurse(struct btree *b, struct btree_op *op,
|
||||
struct closure *writes, struct gc_stat *gc)
|
||||
|
|
@ -1668,8 +1669,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
|
|||
memmove(r + 1, r, sizeof(r[0]) * (GC_MERGE_NODES - 1));
|
||||
r->b = NULL;
|
||||
|
||||
if (atomic_read(&b->c->search_inflight) &&
|
||||
gc->nodes >= gc->nodes_pre + btree_gc_min_nodes(b->c)) {
|
||||
if (gc->nodes >= (gc->nodes_pre + btree_gc_min_nodes(b->c))) {
|
||||
gc->nodes_pre = gc->nodes;
|
||||
ret = -EAGAIN;
|
||||
break;
|
||||
|
|
@ -1846,8 +1846,8 @@ static void bch_btree_gc(struct cache_set *c)
|
|||
cond_resched();
|
||||
|
||||
if (ret == -EAGAIN)
|
||||
schedule_timeout_interruptible(msecs_to_jiffies
|
||||
(GC_SLEEP_MS));
|
||||
schedule_timeout_interruptible(
|
||||
msecs_to_jiffies(btree_gc_sleep_ms(c)));
|
||||
else if (ret)
|
||||
pr_warn("gc failed!\n");
|
||||
} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
|
||||
|
|
@ -2822,7 +2822,8 @@ void bch_btree_exit(void)
|
|||
|
||||
int __init bch_btree_init(void)
|
||||
{
|
||||
btree_io_wq = alloc_workqueue("bch_btree_io", WQ_MEM_RECLAIM, 0);
|
||||
btree_io_wq = alloc_workqueue("bch_btree_io",
|
||||
WQ_MEM_RECLAIM | WQ_PERCPU, 0);
|
||||
if (!btree_io_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
|
|
|
|||
|
|
@ -275,8 +275,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
|
|||
* ja->cur_idx
|
||||
*/
|
||||
ja->cur_idx = i;
|
||||
ja->last_idx = ja->discard_idx = (i + 1) %
|
||||
ca->sb.njournal_buckets;
|
||||
ja->last_idx = (i + 1) % ca->sb.njournal_buckets;
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -336,16 +335,6 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
|
|||
}
|
||||
}
|
||||
|
||||
static bool is_discard_enabled(struct cache_set *s)
|
||||
{
|
||||
struct cache *ca = s->cache;
|
||||
|
||||
if (ca->discard)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int bch_journal_replay(struct cache_set *s, struct list_head *list)
|
||||
{
|
||||
int ret = 0, keys = 0, entries = 0;
|
||||
|
|
@ -360,15 +349,10 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
|
|||
BUG_ON(i->pin && atomic_read(i->pin) != 1);
|
||||
|
||||
if (n != i->j.seq) {
|
||||
if (n == start && is_discard_enabled(s))
|
||||
pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n",
|
||||
n, i->j.seq - 1, start, end);
|
||||
else {
|
||||
pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n",
|
||||
n, i->j.seq - 1, start, end);
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n",
|
||||
n, i->j.seq - 1, start, end);
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (k = i->j.start;
|
||||
|
|
@ -568,65 +552,6 @@ static void btree_flush_write(struct cache_set *c)
|
|||
|
||||
#define last_seq(j) ((j)->seq - fifo_used(&(j)->pin) + 1)
|
||||
|
||||
static void journal_discard_endio(struct bio *bio)
|
||||
{
|
||||
struct journal_device *ja =
|
||||
container_of(bio, struct journal_device, discard_bio);
|
||||
struct cache *ca = container_of(ja, struct cache, journal);
|
||||
|
||||
atomic_set(&ja->discard_in_flight, DISCARD_DONE);
|
||||
|
||||
closure_wake_up(&ca->set->journal.wait);
|
||||
closure_put(&ca->set->cl);
|
||||
}
|
||||
|
||||
static void journal_discard_work(struct work_struct *work)
|
||||
{
|
||||
struct journal_device *ja =
|
||||
container_of(work, struct journal_device, discard_work);
|
||||
|
||||
submit_bio(&ja->discard_bio);
|
||||
}
|
||||
|
||||
static void do_journal_discard(struct cache *ca)
|
||||
{
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bio *bio = &ja->discard_bio;
|
||||
|
||||
if (!ca->discard) {
|
||||
ja->discard_idx = ja->last_idx;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (atomic_read(&ja->discard_in_flight)) {
|
||||
case DISCARD_IN_FLIGHT:
|
||||
return;
|
||||
|
||||
case DISCARD_DONE:
|
||||
ja->discard_idx = (ja->discard_idx + 1) %
|
||||
ca->sb.njournal_buckets;
|
||||
|
||||
atomic_set(&ja->discard_in_flight, DISCARD_READY);
|
||||
fallthrough;
|
||||
|
||||
case DISCARD_READY:
|
||||
if (ja->discard_idx == ja->last_idx)
|
||||
return;
|
||||
|
||||
atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT);
|
||||
|
||||
bio_init_inline(bio, ca->bdev, 1, REQ_OP_DISCARD);
|
||||
bio->bi_iter.bi_sector = bucket_to_sector(ca->set,
|
||||
ca->sb.d[ja->discard_idx]);
|
||||
bio->bi_iter.bi_size = bucket_bytes(ca);
|
||||
bio->bi_end_io = journal_discard_endio;
|
||||
|
||||
closure_get(&ca->set->cl);
|
||||
INIT_WORK(&ja->discard_work, journal_discard_work);
|
||||
queue_work(bch_journal_wq, &ja->discard_work);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int free_journal_buckets(struct cache_set *c)
|
||||
{
|
||||
struct journal *j = &c->journal;
|
||||
|
|
@ -635,10 +560,10 @@ static unsigned int free_journal_buckets(struct cache_set *c)
|
|||
unsigned int n;
|
||||
|
||||
/* In case njournal_buckets is not power of 2 */
|
||||
if (ja->cur_idx >= ja->discard_idx)
|
||||
n = ca->sb.njournal_buckets + ja->discard_idx - ja->cur_idx;
|
||||
if (ja->cur_idx >= ja->last_idx)
|
||||
n = ca->sb.njournal_buckets + ja->last_idx - ja->cur_idx;
|
||||
else
|
||||
n = ja->discard_idx - ja->cur_idx;
|
||||
n = ja->last_idx - ja->cur_idx;
|
||||
|
||||
if (n > (1 + j->do_reserve))
|
||||
return n - (1 + j->do_reserve);
|
||||
|
|
@ -668,8 +593,6 @@ static void journal_reclaim(struct cache_set *c)
|
|||
ja->last_idx = (ja->last_idx + 1) %
|
||||
ca->sb.njournal_buckets;
|
||||
|
||||
do_journal_discard(ca);
|
||||
|
||||
if (c->journal.blocks_free)
|
||||
goto out;
|
||||
|
||||
|
|
|
|||
|
|
@ -139,19 +139,6 @@ struct journal_device {
|
|||
/* Last journal bucket that still contains an open journal entry */
|
||||
unsigned int last_idx;
|
||||
|
||||
/* Next journal bucket to be discarded */
|
||||
unsigned int discard_idx;
|
||||
|
||||
#define DISCARD_READY 0
|
||||
#define DISCARD_IN_FLIGHT 1
|
||||
#define DISCARD_DONE 2
|
||||
/* 1 - discard in flight, -1 - discard completed */
|
||||
atomic_t discard_in_flight;
|
||||
|
||||
struct work_struct discard_work;
|
||||
struct bio discard_bio;
|
||||
struct bio_vec discard_bv;
|
||||
|
||||
/* Bio for journal reads/writes to this device */
|
||||
struct bio bio;
|
||||
struct bio_vec bv[8];
|
||||
|
|
|
|||
|
|
@ -1388,7 +1388,7 @@ static CLOSURE_CALLBACK(cached_dev_flush)
|
|||
bch_cache_accounting_destroy(&dc->accounting);
|
||||
kobject_del(&d->kobj);
|
||||
|
||||
continue_at(cl, cached_dev_free, system_wq);
|
||||
continue_at(cl, cached_dev_free, system_percpu_wq);
|
||||
}
|
||||
|
||||
static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
|
||||
|
|
@ -1400,7 +1400,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
|
|||
__module_get(THIS_MODULE);
|
||||
INIT_LIST_HEAD(&dc->list);
|
||||
closure_init(&dc->disk.cl, NULL);
|
||||
set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
|
||||
set_closure_fn(&dc->disk.cl, cached_dev_flush, system_percpu_wq);
|
||||
kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
|
||||
INIT_WORK(&dc->detach, cached_dev_detach_finish);
|
||||
sema_init(&dc->sb_write_mutex, 1);
|
||||
|
|
@ -1513,7 +1513,7 @@ static CLOSURE_CALLBACK(flash_dev_flush)
|
|||
bcache_device_unlink(d);
|
||||
mutex_unlock(&bch_register_lock);
|
||||
kobject_del(&d->kobj);
|
||||
continue_at(cl, flash_dev_free, system_wq);
|
||||
continue_at(cl, flash_dev_free, system_percpu_wq);
|
||||
}
|
||||
|
||||
static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
|
||||
|
|
@ -1525,7 +1525,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
|
|||
goto err_ret;
|
||||
|
||||
closure_init(&d->cl, NULL);
|
||||
set_closure_fn(&d->cl, flash_dev_flush, system_wq);
|
||||
set_closure_fn(&d->cl, flash_dev_flush, system_percpu_wq);
|
||||
|
||||
kobject_init(&d->kobj, &bch_flash_dev_ktype);
|
||||
|
||||
|
|
@ -1833,7 +1833,7 @@ static CLOSURE_CALLBACK(__cache_set_unregister)
|
|||
|
||||
mutex_unlock(&bch_register_lock);
|
||||
|
||||
continue_at(cl, cache_set_flush, system_wq);
|
||||
continue_at(cl, cache_set_flush, system_percpu_wq);
|
||||
}
|
||||
|
||||
void bch_cache_set_stop(struct cache_set *c)
|
||||
|
|
@ -1863,10 +1863,10 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
|
|||
|
||||
__module_get(THIS_MODULE);
|
||||
closure_init(&c->cl, NULL);
|
||||
set_closure_fn(&c->cl, cache_set_free, system_wq);
|
||||
set_closure_fn(&c->cl, cache_set_free, system_percpu_wq);
|
||||
|
||||
closure_init(&c->caching, &c->cl);
|
||||
set_closure_fn(&c->caching, __cache_set_unregister, system_wq);
|
||||
set_closure_fn(&c->caching, __cache_set_unregister, system_percpu_wq);
|
||||
|
||||
/* Maybe create continue_at_noreturn() and use it here? */
|
||||
closure_set_stopped(&c->cl);
|
||||
|
|
@ -1939,7 +1939,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
|
|||
if (!c->uuids)
|
||||
goto err;
|
||||
|
||||
c->moving_gc_wq = alloc_workqueue("bcache_gc", WQ_MEM_RECLAIM, 0);
|
||||
c->moving_gc_wq = alloc_workqueue("bcache_gc",
|
||||
WQ_MEM_RECLAIM | WQ_PERCPU, 0);
|
||||
if (!c->moving_gc_wq)
|
||||
goto err;
|
||||
|
||||
|
|
@ -2382,9 +2383,6 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
|
|||
ca->bdev = file_bdev(bdev_file);
|
||||
ca->sb_disk = sb_disk;
|
||||
|
||||
if (bdev_max_discard_sectors(file_bdev(bdev_file)))
|
||||
ca->discard = CACHE_DISCARD(&ca->sb);
|
||||
|
||||
ret = cache_alloc(ca);
|
||||
if (ret != 0) {
|
||||
if (ret == -ENOMEM)
|
||||
|
|
@ -2531,7 +2529,7 @@ static void register_device_async(struct async_reg_args *args)
|
|||
INIT_DELAYED_WORK(&args->reg_work, register_cache_worker);
|
||||
|
||||
/* 10 jiffies is enough for a delay */
|
||||
queue_delayed_work(system_wq, &args->reg_work, 10);
|
||||
queue_delayed_work(system_percpu_wq, &args->reg_work, 10);
|
||||
}
|
||||
|
||||
static void *alloc_holder_object(struct cache_sb *sb)
|
||||
|
|
@ -2905,24 +2903,25 @@ static int __init bcache_init(void)
|
|||
if (bch_btree_init())
|
||||
goto err;
|
||||
|
||||
bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0);
|
||||
bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
|
||||
if (!bcache_wq)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* Let's not make this `WQ_MEM_RECLAIM` for the following reasons:
|
||||
*
|
||||
* 1. It used `system_wq` before which also does no memory reclaim.
|
||||
* 1. It used `system_percpu_wq` before which also does no memory reclaim.
|
||||
* 2. With `WQ_MEM_RECLAIM` desktop stalls, increased boot times, and
|
||||
* reduced throughput can be observed.
|
||||
*
|
||||
* We still want to user our own queue to not congest the `system_wq`.
|
||||
* We still want to user our own queue to not congest the `system_percpu_wq`.
|
||||
*/
|
||||
bch_flush_wq = alloc_workqueue("bch_flush", 0, 0);
|
||||
bch_flush_wq = alloc_workqueue("bch_flush", WQ_PERCPU, 0);
|
||||
if (!bch_flush_wq)
|
||||
goto err;
|
||||
|
||||
bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0);
|
||||
bch_journal_wq = alloc_workqueue("bch_journal",
|
||||
WQ_MEM_RECLAIM | WQ_PERCPU, 0);
|
||||
if (!bch_journal_wq)
|
||||
goto err;
|
||||
|
||||
|
|
|
|||
|
|
@ -134,7 +134,6 @@ read_attribute(partial_stripes_expensive);
|
|||
rw_attribute(synchronous);
|
||||
rw_attribute(journal_delay_ms);
|
||||
rw_attribute(io_disable);
|
||||
rw_attribute(discard);
|
||||
rw_attribute(running);
|
||||
rw_attribute(label);
|
||||
rw_attribute(errors);
|
||||
|
|
@ -1036,7 +1035,6 @@ SHOW(__bch_cache)
|
|||
sysfs_hprint(bucket_size, bucket_bytes(ca));
|
||||
sysfs_hprint(block_size, block_bytes(ca));
|
||||
sysfs_print(nbuckets, ca->sb.nbuckets);
|
||||
sysfs_print(discard, ca->discard);
|
||||
sysfs_hprint(written, atomic_long_read(&ca->sectors_written) << 9);
|
||||
sysfs_hprint(btree_written,
|
||||
atomic_long_read(&ca->btree_sectors_written) << 9);
|
||||
|
|
@ -1142,18 +1140,6 @@ STORE(__bch_cache)
|
|||
if (bcache_is_reboot)
|
||||
return -EBUSY;
|
||||
|
||||
if (attr == &sysfs_discard) {
|
||||
bool v = strtoul_or_return(buf);
|
||||
|
||||
if (bdev_max_discard_sectors(ca->bdev))
|
||||
ca->discard = v;
|
||||
|
||||
if (v != CACHE_DISCARD(&ca->sb)) {
|
||||
SET_CACHE_DISCARD(&ca->sb, v);
|
||||
bcache_write_super(ca->set);
|
||||
}
|
||||
}
|
||||
|
||||
if (attr == &sysfs_cache_replacement_policy) {
|
||||
v = __sysfs_match_string(cache_replacement_policies, -1, buf);
|
||||
if (v < 0)
|
||||
|
|
@ -1185,7 +1171,6 @@ static struct attribute *bch_cache_attrs[] = {
|
|||
&sysfs_block_size,
|
||||
&sysfs_nbuckets,
|
||||
&sysfs_priority_stats,
|
||||
&sysfs_discard,
|
||||
&sysfs_written,
|
||||
&sysfs_btree_written,
|
||||
&sysfs_metadata_written,
|
||||
|
|
|
|||
|
|
@ -805,8 +805,7 @@ static int bch_writeback_thread(void *arg)
|
|||
* may set BCH_ENABLE_AUTO_GC via sysfs, then when
|
||||
* BCH_DO_AUTO_GC is set, garbage collection thread
|
||||
* will be wake up here. After moving gc, the shrunk
|
||||
* btree and discarded free buckets SSD space may be
|
||||
* helpful for following write requests.
|
||||
* btree may be helpful for following write requests.
|
||||
*/
|
||||
if (c->gc_after_writeback ==
|
||||
(BCH_ENABLE_AUTO_GC|BCH_DO_AUTO_GC)) {
|
||||
|
|
@ -1076,7 +1075,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
|
|||
int bch_cached_dev_writeback_start(struct cached_dev *dc)
|
||||
{
|
||||
dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq",
|
||||
WQ_MEM_RECLAIM, 0);
|
||||
WQ_MEM_RECLAIM | WQ_PERCPU, 0);
|
||||
if (!dc->writeback_write_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue