Skip to content

Commit 764c7c9

Browse files
morbidrsakdave
authored andcommitted
btrfs: zoned: fix parallel compressed writes
When multiple processes write data to the same block group on a compressed zoned filesystem, the underlying device could report I/O errors and data corruption is possible. This happens because on a zoned file system, compressed data writes where sent to the device via a REQ_OP_WRITE instead of a REQ_OP_ZONE_APPEND operation. But with REQ_OP_WRITE and parallel submission it cannot be guaranteed that the data is always submitted aligned to the underlying zone's write pointer. The change to using REQ_OP_ZONE_APPEND instead of REQ_OP_WRITE on a zoned filesystem is non intrusive on a regular file system or when submitting to a conventional zone on a zoned filesystem, as it is guarded by btrfs_use_zone_append. Reported-by: David Sterba <[email protected]> Fixes: 9d294a6 ("btrfs: zoned: enable to mount ZONED incompat flag") CC: [email protected] # 5.12.x: e380adf: btrfs: zoned: pass start block to btrfs_use_zone_append CC: [email protected] # 5.12.x Signed-off-by: Johannes Thumshirn <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent e380adf commit 764c7c9

File tree

1 file changed

+38
-4
lines changed

1 file changed

+38
-4
lines changed

fs/btrfs/compression.c

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "compression.h"
2929
#include "extent_io.h"
3030
#include "extent_map.h"
31+
#include "zoned.h"
3132

3233
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
3334

@@ -349,6 +350,7 @@ static void end_compressed_bio_write(struct bio *bio)
349350
*/
350351
inode = cb->inode;
351352
cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
353+
btrfs_record_physical_zoned(inode, cb->start, bio);
352354
btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
353355
cb->start, cb->start + cb->len - 1,
354356
bio->bi_status == BLK_STS_OK);
@@ -401,6 +403,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
401403
u64 first_byte = disk_start;
402404
blk_status_t ret;
403405
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
406+
const bool use_append = btrfs_use_zone_append(inode, disk_start);
407+
const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
404408

405409
WARN_ON(!PAGE_ALIGNED(start));
406410
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -418,10 +422,31 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
418422
cb->nr_pages = nr_pages;
419423

420424
bio = btrfs_bio_alloc(first_byte);
421-
bio->bi_opf = REQ_OP_WRITE | write_flags;
425+
bio->bi_opf = bio_op | write_flags;
422426
bio->bi_private = cb;
423427
bio->bi_end_io = end_compressed_bio_write;
424428

429+
if (use_append) {
430+
struct extent_map *em;
431+
struct map_lookup *map;
432+
struct block_device *bdev;
433+
434+
em = btrfs_get_chunk_map(fs_info, disk_start, PAGE_SIZE);
435+
if (IS_ERR(em)) {
436+
kfree(cb);
437+
bio_put(bio);
438+
return BLK_STS_NOTSUPP;
439+
}
440+
441+
map = em->map_lookup;
442+
/* We only support single profile for now */
443+
ASSERT(map->num_stripes == 1);
444+
bdev = map->stripes[0].dev->bdev;
445+
446+
bio_set_dev(bio, bdev);
447+
free_extent_map(em);
448+
}
449+
425450
if (blkcg_css) {
426451
bio->bi_opf |= REQ_CGROUP_PUNT;
427452
kthread_associate_blkcg(blkcg_css);
@@ -432,16 +457,21 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
432457
bytes_left = compressed_len;
433458
for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
434459
int submit = 0;
460+
int len;
435461

436462
page = compressed_pages[pg_index];
437463
page->mapping = inode->vfs_inode.i_mapping;
438464
if (bio->bi_iter.bi_size)
439465
submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
440466
0);
441467

468+
if (pg_index == 0 && use_append)
469+
len = bio_add_zone_append_page(bio, page, PAGE_SIZE, 0);
470+
else
471+
len = bio_add_page(bio, page, PAGE_SIZE, 0);
472+
442473
page->mapping = NULL;
443-
if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
444-
PAGE_SIZE) {
474+
if (submit || len < PAGE_SIZE) {
445475
/*
446476
* inc the count before we submit the bio so
447477
* we know the end IO handler won't happen before
@@ -465,11 +495,15 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
465495
}
466496

467497
bio = btrfs_bio_alloc(first_byte);
468-
bio->bi_opf = REQ_OP_WRITE | write_flags;
498+
bio->bi_opf = bio_op | write_flags;
469499
bio->bi_private = cb;
470500
bio->bi_end_io = end_compressed_bio_write;
471501
if (blkcg_css)
472502
bio->bi_opf |= REQ_CGROUP_PUNT;
503+
/*
504+
* Use bio_add_page() to ensure the bio has at least one
505+
* page.
506+
*/
473507
bio_add_page(bio, page, PAGE_SIZE, 0);
474508
}
475509
if (bytes_left < PAGE_SIZE) {

0 commit comments

Comments
 (0)