Skip to content

Commit 2d5b569

Browse files
author
NeilBrown
committed
md/raid5: avoid races when changing cache size.
Cache size can grow or shrink due to various pressures at any time. So when we resize the cache as part of a 'grow' operation (i.e. change the size to allow more devices) we need to blocks that automatic growing/shrinking. So introduce a mutex. auto grow/shrink uses mutex_trylock() and just doesn't bother if there is a blockage. Resizing the whole cache holds the mutex to ensure that the correct number of new stripes is allocated. This bug can result in some stripes not being freed when an array is stopped. This leads to the kmem_cache not being freed and a subsequent array can try to use the same kmem_cache and get confused. Fixes: edbe83a ("md/raid5: allow the stripe_cache to grow and shrink.") Cc: [email protected] (4.1 - please delay until 2 weeks after release of 4.2) Signed-off-by: NeilBrown <[email protected]>
1 parent 6aaf0da commit 2d5b569

File tree

2 files changed

+27
-7
lines changed

2 files changed

+27
-7
lines changed

drivers/md/raid5.c

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,6 +2162,9 @@ static int resize_stripes(struct r5conf *conf, int newsize)
21622162
if (!sc)
21632163
return -ENOMEM;
21642164

2165+
/* Need to ensure auto-resizing doesn't interfere */
2166+
mutex_lock(&conf->cache_size_mutex);
2167+
21652168
for (i = conf->max_nr_stripes; i; i--) {
21662169
nsh = alloc_stripe(sc, GFP_KERNEL);
21672170
if (!nsh)
@@ -2178,6 +2181,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
21782181
kmem_cache_free(sc, nsh);
21792182
}
21802183
kmem_cache_destroy(sc);
2184+
mutex_unlock(&conf->cache_size_mutex);
21812185
return -ENOMEM;
21822186
}
21832187
/* Step 2 - Must use GFP_NOIO now.
@@ -2224,6 +2228,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
22242228
} else
22252229
err = -ENOMEM;
22262230

2231+
mutex_unlock(&conf->cache_size_mutex);
22272232
/* Step 4, return new stripes to service */
22282233
while(!list_empty(&newstripes)) {
22292234
nsh = list_entry(newstripes.next, struct stripe_head, lru);
@@ -5857,12 +5862,14 @@ static void raid5d(struct md_thread *thread)
58575862
pr_debug("%d stripes handled\n", handled);
58585863

58595864
spin_unlock_irq(&conf->device_lock);
5860-
if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) {
5865+
if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) &&
5866+
mutex_trylock(&conf->cache_size_mutex)) {
58615867
grow_one_stripe(conf, __GFP_NOWARN);
58625868
/* Set flag even if allocation failed. This helps
58635869
* slow down allocation requests when mem is short
58645870
*/
58655871
set_bit(R5_DID_ALLOC, &conf->cache_state);
5872+
mutex_unlock(&conf->cache_size_mutex);
58665873
}
58675874

58685875
async_tx_issue_pending_all();
@@ -5894,18 +5901,22 @@ raid5_set_cache_size(struct mddev *mddev, int size)
58945901
return -EINVAL;
58955902

58965903
conf->min_nr_stripes = size;
5904+
mutex_lock(&conf->cache_size_mutex);
58975905
while (size < conf->max_nr_stripes &&
58985906
drop_one_stripe(conf))
58995907
;
5908+
mutex_unlock(&conf->cache_size_mutex);
59005909

59015910

59025911
err = md_allow_write(mddev);
59035912
if (err)
59045913
return err;
59055914

5915+
mutex_lock(&conf->cache_size_mutex);
59065916
while (size > conf->max_nr_stripes)
59075917
if (!grow_one_stripe(conf, GFP_KERNEL))
59085918
break;
5919+
mutex_unlock(&conf->cache_size_mutex);
59095920

59105921
return 0;
59115922
}
@@ -6371,11 +6382,18 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink,
63716382
struct shrink_control *sc)
63726383
{
63736384
struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
6374-
int ret = 0;
6375-
while (ret < sc->nr_to_scan) {
6376-
if (drop_one_stripe(conf) == 0)
6377-
return SHRINK_STOP;
6378-
ret++;
6385+
unsigned long ret = SHRINK_STOP;
6386+
6387+
if (mutex_trylock(&conf->cache_size_mutex)) {
6388+
ret= 0;
6389+
while (ret < sc->nr_to_scan) {
6390+
if (drop_one_stripe(conf) == 0) {
6391+
ret = SHRINK_STOP;
6392+
break;
6393+
}
6394+
ret++;
6395+
}
6396+
mutex_unlock(&conf->cache_size_mutex);
63796397
}
63806398
return ret;
63816399
}
@@ -6444,6 +6462,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
64446462
goto abort;
64456463
spin_lock_init(&conf->device_lock);
64466464
seqcount_init(&conf->gen_lock);
6465+
mutex_init(&conf->cache_size_mutex);
64476466
init_waitqueue_head(&conf->wait_for_quiescent);
64486467
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
64496468
init_waitqueue_head(&conf->wait_for_stripe[i]);

drivers/md/raid5.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,8 @@ struct r5conf {
482482
*/
483483
int active_name;
484484
char cache_name[2][32];
485-
struct kmem_cache *slab_cache; /* for allocating stripes */
485+
struct kmem_cache *slab_cache; /* for allocating stripes */
486+
struct mutex cache_size_mutex; /* Protect changes to cache size */
486487

487488
int seq_flush, seq_write;
488489
int quiesce;

0 commit comments

Comments
 (0)