Skip to content

Commit fcf6b1b

Browse files
Dmitry Monakhovtytso
authored andcommitted
ext4: refactor ext4_move_extents code base
ext4_move_extents is too complex for review. It has duplicate almost each function available in the rest of other codebase. It has useless artificial restriction orig_offset == donor_offset. But in fact logic of ext4_move_extents is very simple: Iterate extents one by one (similar to ext4_fill_fiemap_extents) ->Iterate each page covered extent (similar to generic_perform_write) ->swap extents for covered by page (can be shared with IOC_MOVE_DATA) Signed-off-by: Dmitry Monakhov <[email protected]> Signed-off-by: Theodore Ts'o <[email protected]>
1 parent f8fb4f4 commit fcf6b1b

File tree

3 files changed

+338
-891
lines changed

3 files changed

+338
-891
lines changed

fs/ext4/ext4.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2740,10 +2740,15 @@ extern int ext4_find_delalloc_range(struct inode *inode,
27402740
ext4_lblk_t lblk_start,
27412741
ext4_lblk_t lblk_end);
27422742
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2743+
extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
27432744
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
27442745
__u64 start, __u64 len);
27452746
extern int ext4_ext_precache(struct inode *inode);
27462747
extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
2748+
extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
2749+
struct inode *inode2, ext4_lblk_t lblk1,
2750+
ext4_lblk_t lblk2, ext4_lblk_t count,
2751+
int mark_unwritten,int *err);
27472752

27482753
/* move_extent.c */
27492754
extern void ext4_double_down_write_data_sem(struct inode *first,

fs/ext4/extents.c

Lines changed: 221 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,19 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
291291
return size;
292292
}
293293

294+
static inline int
295+
ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
296+
struct ext4_ext_path *path, ext4_lblk_t lblk,
297+
int nofail)
298+
{
299+
int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
300+
301+
return ext4_split_extent_at(handle, inode, path, lblk, unwritten ?
302+
EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
303+
EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
304+
(nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
305+
}
306+
294307
/*
295308
* Calculate the number of metadata blocks needed
296309
* to allocate @blocks
@@ -1559,7 +1572,7 @@ static int ext4_ext_search_right(struct inode *inode,
15591572
* allocated block. Thus, index entries have to be consistent
15601573
* with leaves.
15611574
*/
1562-
static ext4_lblk_t
1575+
ext4_lblk_t
15631576
ext4_ext_next_allocated_block(struct ext4_ext_path *path)
15641577
{
15651578
int depth;
@@ -2854,24 +2867,14 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
28542867
*/
28552868
if (end >= ee_block &&
28562869
end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
2857-
int split_flag = 0;
2858-
2859-
if (ext4_ext_is_unwritten(ex))
2860-
split_flag = EXT4_EXT_MARK_UNWRIT1 |
2861-
EXT4_EXT_MARK_UNWRIT2;
2862-
28632870
/*
28642871
* Split the extent in two so that 'end' is the last
28652872
* block in the first new extent. Also we should not
28662873
* fail removing space due to ENOSPC so try to use
28672874
* reserved block if that happens.
28682875
*/
2869-
err = ext4_split_extent_at(handle, inode, path,
2870-
end + 1, split_flag,
2871-
EXT4_EX_NOCACHE |
2872-
EXT4_GET_BLOCKS_PRE_IO |
2873-
EXT4_GET_BLOCKS_METADATA_NOFAIL);
2874-
2876+
err = ext4_force_split_extent_at(handle, inode, path,
2877+
end + 1, 1);
28752878
if (err < 0)
28762879
goto out;
28772880
}
@@ -5506,3 +5509,208 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
55065509
mutex_unlock(&inode->i_mutex);
55075510
return ret;
55085511
}
5512+
5513+
/**
5514+
* ext4_swap_extents - Swap extents between two inodes
5515+
*
5516+
* @inode1: First inode
5517+
* @inode2: Second inode
5518+
* @lblk1: Start block for first inode
5519+
* @lblk2: Start block for second inode
5520+
* @count: Number of blocks to swap
5521+
* @mark_unwritten: Mark second inode's extents as unwritten after swap
5522+
* @erp: Pointer to save error value
5523+
*
5524+
* This helper routine does exactly what is promise "swap extents". All other
5525+
* stuff such as page-cache locking consistency, bh mapping consistency or
5526+
* extent's data copying must be performed by caller.
5527+
* Locking:
5528+
* i_mutex is held for both inodes
5529+
* i_data_sem is locked for write for both inodes
5530+
* Assumptions:
5531+
* All pages from requested range are locked for both inodes
5532+
*/
5533+
int
5534+
ext4_swap_extents(handle_t *handle, struct inode *inode1,
5535+
struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
5536+
ext4_lblk_t count, int unwritten, int *erp)
5537+
{
5538+
struct ext4_ext_path *path1 = NULL;
5539+
struct ext4_ext_path *path2 = NULL;
5540+
int replaced_count = 0;
5541+
5542+
BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
5543+
BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
5544+
BUG_ON(!mutex_is_locked(&inode1->i_mutex));
5545+
BUG_ON(!mutex_is_locked(&inode1->i_mutex));
5546+
5547+
*erp = ext4_es_remove_extent(inode1, lblk1, count);
5548+
if (*erp)
5549+
return 0;
5550+
*erp = ext4_es_remove_extent(inode2, lblk2, count);
5551+
if (*erp)
5552+
return 0;
5553+
5554+
while (count) {
5555+
struct ext4_extent *ex1, *ex2, tmp_ex;
5556+
ext4_lblk_t e1_blk, e2_blk;
5557+
int e1_len, e2_len, len;
5558+
int split = 0;
5559+
5560+
path1 = ext4_ext_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
5561+
if (IS_ERR(path1)) {
5562+
*erp = PTR_ERR(path1);
5563+
break;
5564+
}
5565+
path2 = ext4_ext_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
5566+
if (IS_ERR(path2)) {
5567+
*erp = PTR_ERR(path2);
5568+
break;
5569+
}
5570+
ex1 = path1[path1->p_depth].p_ext;
5571+
ex2 = path2[path2->p_depth].p_ext;
5572+
/* Do we have somthing to swap ? */
5573+
if (unlikely(!ex2 || !ex1))
5574+
break;
5575+
5576+
e1_blk = le32_to_cpu(ex1->ee_block);
5577+
e2_blk = le32_to_cpu(ex2->ee_block);
5578+
e1_len = ext4_ext_get_actual_len(ex1);
5579+
e2_len = ext4_ext_get_actual_len(ex2);
5580+
5581+
/* Hole handling */
5582+
if (!in_range(lblk1, e1_blk, e1_len) ||
5583+
!in_range(lblk2, e2_blk, e2_len)) {
5584+
ext4_lblk_t next1, next2;
5585+
5586+
/* if hole after extent, then go to next extent */
5587+
next1 = ext4_ext_next_allocated_block(path1);
5588+
next2 = ext4_ext_next_allocated_block(path2);
5589+
/* If hole before extent, then shift to that extent */
5590+
if (e1_blk > lblk1)
5591+
next1 = e1_blk;
5592+
if (e2_blk > lblk2)
5593+
next2 = e1_blk;
5594+
/* Do we have something to swap */
5595+
if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
5596+
break;
5597+
/* Move to the rightest boundary */
5598+
len = next1 - lblk1;
5599+
if (len < next2 - lblk2)
5600+
len = next2 - lblk2;
5601+
if (len > count)
5602+
len = count;
5603+
lblk1 += len;
5604+
lblk2 += len;
5605+
count -= len;
5606+
goto repeat;
5607+
}
5608+
5609+
/* Prepare left boundary */
5610+
if (e1_blk < lblk1) {
5611+
split = 1;
5612+
*erp = ext4_force_split_extent_at(handle, inode1,
5613+
path1, lblk1, 0);
5614+
if (*erp)
5615+
break;
5616+
}
5617+
if (e2_blk < lblk2) {
5618+
split = 1;
5619+
*erp = ext4_force_split_extent_at(handle, inode2,
5620+
path2, lblk2, 0);
5621+
if (*erp)
5622+
break;
5623+
}
5624+
/* ext4_split_extent_at() may retult in leaf extent split,
5625+
* path must to be revalidated. */
5626+
if (split)
5627+
goto repeat;
5628+
5629+
/* Prepare right boundary */
5630+
len = count;
5631+
if (len > e1_blk + e1_len - lblk1)
5632+
len = e1_blk + e1_len - lblk1;
5633+
if (len > e2_blk + e2_len - lblk2)
5634+
len = e2_blk + e2_len - lblk2;
5635+
5636+
if (len != e1_len) {
5637+
split = 1;
5638+
*erp = ext4_force_split_extent_at(handle, inode1,
5639+
path1, lblk1 + len, 0);
5640+
if (*erp)
5641+
break;
5642+
}
5643+
if (len != e2_len) {
5644+
split = 1;
5645+
*erp = ext4_force_split_extent_at(handle, inode2,
5646+
path2, lblk2 + len, 0);
5647+
if (*erp)
5648+
break;
5649+
}
5650+
/* ext4_split_extent_at() may retult in leaf extent split,
5651+
* path must to be revalidated. */
5652+
if (split)
5653+
goto repeat;
5654+
5655+
BUG_ON(e2_len != e1_len);
5656+
*erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
5657+
if (*erp)
5658+
break;
5659+
*erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
5660+
if (*erp)
5661+
break;
5662+
5663+
/* Both extents are fully inside boundaries. Swap it now */
5664+
tmp_ex = *ex1;
5665+
ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
5666+
ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
5667+
ex1->ee_len = cpu_to_le16(e2_len);
5668+
ex2->ee_len = cpu_to_le16(e1_len);
5669+
if (unwritten)
5670+
ext4_ext_mark_unwritten(ex2);
5671+
if (ext4_ext_is_unwritten(&tmp_ex))
5672+
ext4_ext_mark_unwritten(ex1);
5673+
5674+
ext4_ext_try_to_merge(handle, inode2, path2, ex2);
5675+
ext4_ext_try_to_merge(handle, inode1, path1, ex1);
5676+
*erp = ext4_ext_dirty(handle, inode2, path2 +
5677+
path2->p_depth);
5678+
if (*erp)
5679+
break;
5680+
*erp = ext4_ext_dirty(handle, inode1, path1 +
5681+
path1->p_depth);
5682+
/*
5683+
* Looks scarry ah..? second inode already points to new blocks,
5684+
* and it was successfully dirtied. But luckily error may happen
5685+
* only due to journal error, so full transaction will be
5686+
* aborted anyway.
5687+
*/
5688+
if (*erp)
5689+
break;
5690+
lblk1 += len;
5691+
lblk2 += len;
5692+
replaced_count += len;
5693+
count -= len;
5694+
5695+
repeat:
5696+
if (path1) {
5697+
ext4_ext_drop_refs(path1);
5698+
kfree(path1);
5699+
path1 = NULL;
5700+
}
5701+
if (path2) {
5702+
ext4_ext_drop_refs(path2);
5703+
kfree(path2);
5704+
path2 = NULL;
5705+
}
5706+
}
5707+
if (path1) {
5708+
ext4_ext_drop_refs(path1);
5709+
kfree(path1);
5710+
}
5711+
if (path2) {
5712+
ext4_ext_drop_refs(path2);
5713+
kfree(path2);
5714+
}
5715+
return replaced_count;
5716+
}

0 commit comments

Comments
 (0)