@@ -291,6 +291,19 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
291
291
return size ;
292
292
}
293
293
294
+ static inline int
295
+ ext4_force_split_extent_at (handle_t * handle , struct inode * inode ,
296
+ struct ext4_ext_path * path , ext4_lblk_t lblk ,
297
+ int nofail )
298
+ {
299
+ int unwritten = ext4_ext_is_unwritten (path [path -> p_depth ].p_ext );
300
+
301
+ return ext4_split_extent_at (handle , inode , path , lblk , unwritten ?
302
+ EXT4_EXT_MARK_UNWRIT1 |EXT4_EXT_MARK_UNWRIT2 : 0 ,
303
+ EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
304
+ (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL :0 ));
305
+ }
306
+
294
307
/*
295
308
* Calculate the number of metadata blocks needed
296
309
* to allocate @blocks
@@ -1559,7 +1572,7 @@ static int ext4_ext_search_right(struct inode *inode,
1559
1572
* allocated block. Thus, index entries have to be consistent
1560
1573
* with leaves.
1561
1574
*/
1562
- static ext4_lblk_t
1575
+ ext4_lblk_t
1563
1576
ext4_ext_next_allocated_block (struct ext4_ext_path * path )
1564
1577
{
1565
1578
int depth ;
@@ -2854,24 +2867,14 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2854
2867
*/
2855
2868
if (end >= ee_block &&
2856
2869
end < ee_block + ext4_ext_get_actual_len (ex ) - 1 ) {
2857
- int split_flag = 0 ;
2858
-
2859
- if (ext4_ext_is_unwritten (ex ))
2860
- split_flag = EXT4_EXT_MARK_UNWRIT1 |
2861
- EXT4_EXT_MARK_UNWRIT2 ;
2862
-
2863
2870
/*
2864
2871
* Split the extent in two so that 'end' is the last
2865
2872
* block in the first new extent. Also we should not
2866
2873
* fail removing space due to ENOSPC so try to use
2867
2874
* reserved block if that happens.
2868
2875
*/
2869
- err = ext4_split_extent_at (handle , inode , path ,
2870
- end + 1 , split_flag ,
2871
- EXT4_EX_NOCACHE |
2872
- EXT4_GET_BLOCKS_PRE_IO |
2873
- EXT4_GET_BLOCKS_METADATA_NOFAIL );
2874
-
2876
+ err = ext4_force_split_extent_at (handle , inode , path ,
2877
+ end + 1 , 1 );
2875
2878
if (err < 0 )
2876
2879
goto out ;
2877
2880
}
@@ -5506,3 +5509,208 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5506
5509
mutex_unlock (& inode -> i_mutex );
5507
5510
return ret ;
5508
5511
}
5512
+
5513
+ /**
5514
+ * ext4_swap_extents - Swap extents between two inodes
5515
+ *
5516
+ * @inode1: First inode
5517
+ * @inode2: Second inode
5518
+ * @lblk1: Start block for first inode
5519
+ * @lblk2: Start block for second inode
5520
+ * @count: Number of blocks to swap
5521
+ * @mark_unwritten: Mark second inode's extents as unwritten after swap
5522
+ * @erp: Pointer to save error value
5523
+ *
5524
+ * This helper routine does exactly what is promise "swap extents". All other
5525
+ * stuff such as page-cache locking consistency, bh mapping consistency or
5526
+ * extent's data copying must be performed by caller.
5527
+ * Locking:
5528
+ * i_mutex is held for both inodes
5529
+ * i_data_sem is locked for write for both inodes
5530
+ * Assumptions:
5531
+ * All pages from requested range are locked for both inodes
5532
+ */
5533
+ int
5534
+ ext4_swap_extents (handle_t * handle , struct inode * inode1 ,
5535
+ struct inode * inode2 , ext4_lblk_t lblk1 , ext4_lblk_t lblk2 ,
5536
+ ext4_lblk_t count , int unwritten , int * erp )
5537
+ {
5538
+ struct ext4_ext_path * path1 = NULL ;
5539
+ struct ext4_ext_path * path2 = NULL ;
5540
+ int replaced_count = 0 ;
5541
+
5542
+ BUG_ON (!rwsem_is_locked (& EXT4_I (inode1 )-> i_data_sem ));
5543
+ BUG_ON (!rwsem_is_locked (& EXT4_I (inode2 )-> i_data_sem ));
5544
+ BUG_ON (!mutex_is_locked (& inode1 -> i_mutex ));
5545
+ BUG_ON (!mutex_is_locked (& inode1 -> i_mutex ));
5546
+
5547
+ * erp = ext4_es_remove_extent (inode1 , lblk1 , count );
5548
+ if (* erp )
5549
+ return 0 ;
5550
+ * erp = ext4_es_remove_extent (inode2 , lblk2 , count );
5551
+ if (* erp )
5552
+ return 0 ;
5553
+
5554
+ while (count ) {
5555
+ struct ext4_extent * ex1 , * ex2 , tmp_ex ;
5556
+ ext4_lblk_t e1_blk , e2_blk ;
5557
+ int e1_len , e2_len , len ;
5558
+ int split = 0 ;
5559
+
5560
+ path1 = ext4_ext_find_extent (inode1 , lblk1 , NULL , EXT4_EX_NOCACHE );
5561
+ if (IS_ERR (path1 )) {
5562
+ * erp = PTR_ERR (path1 );
5563
+ break ;
5564
+ }
5565
+ path2 = ext4_ext_find_extent (inode2 , lblk2 , NULL , EXT4_EX_NOCACHE );
5566
+ if (IS_ERR (path2 )) {
5567
+ * erp = PTR_ERR (path2 );
5568
+ break ;
5569
+ }
5570
+ ex1 = path1 [path1 -> p_depth ].p_ext ;
5571
+ ex2 = path2 [path2 -> p_depth ].p_ext ;
5572
+ /* Do we have somthing to swap ? */
5573
+ if (unlikely (!ex2 || !ex1 ))
5574
+ break ;
5575
+
5576
+ e1_blk = le32_to_cpu (ex1 -> ee_block );
5577
+ e2_blk = le32_to_cpu (ex2 -> ee_block );
5578
+ e1_len = ext4_ext_get_actual_len (ex1 );
5579
+ e2_len = ext4_ext_get_actual_len (ex2 );
5580
+
5581
+ /* Hole handling */
5582
+ if (!in_range (lblk1 , e1_blk , e1_len ) ||
5583
+ !in_range (lblk2 , e2_blk , e2_len )) {
5584
+ ext4_lblk_t next1 , next2 ;
5585
+
5586
+ /* if hole after extent, then go to next extent */
5587
+ next1 = ext4_ext_next_allocated_block (path1 );
5588
+ next2 = ext4_ext_next_allocated_block (path2 );
5589
+ /* If hole before extent, then shift to that extent */
5590
+ if (e1_blk > lblk1 )
5591
+ next1 = e1_blk ;
5592
+ if (e2_blk > lblk2 )
5593
+ next2 = e1_blk ;
5594
+ /* Do we have something to swap */
5595
+ if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS )
5596
+ break ;
5597
+ /* Move to the rightest boundary */
5598
+ len = next1 - lblk1 ;
5599
+ if (len < next2 - lblk2 )
5600
+ len = next2 - lblk2 ;
5601
+ if (len > count )
5602
+ len = count ;
5603
+ lblk1 += len ;
5604
+ lblk2 += len ;
5605
+ count -= len ;
5606
+ goto repeat ;
5607
+ }
5608
+
5609
+ /* Prepare left boundary */
5610
+ if (e1_blk < lblk1 ) {
5611
+ split = 1 ;
5612
+ * erp = ext4_force_split_extent_at (handle , inode1 ,
5613
+ path1 , lblk1 , 0 );
5614
+ if (* erp )
5615
+ break ;
5616
+ }
5617
+ if (e2_blk < lblk2 ) {
5618
+ split = 1 ;
5619
+ * erp = ext4_force_split_extent_at (handle , inode2 ,
5620
+ path2 , lblk2 , 0 );
5621
+ if (* erp )
5622
+ break ;
5623
+ }
5624
+ /* ext4_split_extent_at() may retult in leaf extent split,
5625
+ * path must to be revalidated. */
5626
+ if (split )
5627
+ goto repeat ;
5628
+
5629
+ /* Prepare right boundary */
5630
+ len = count ;
5631
+ if (len > e1_blk + e1_len - lblk1 )
5632
+ len = e1_blk + e1_len - lblk1 ;
5633
+ if (len > e2_blk + e2_len - lblk2 )
5634
+ len = e2_blk + e2_len - lblk2 ;
5635
+
5636
+ if (len != e1_len ) {
5637
+ split = 1 ;
5638
+ * erp = ext4_force_split_extent_at (handle , inode1 ,
5639
+ path1 , lblk1 + len , 0 );
5640
+ if (* erp )
5641
+ break ;
5642
+ }
5643
+ if (len != e2_len ) {
5644
+ split = 1 ;
5645
+ * erp = ext4_force_split_extent_at (handle , inode2 ,
5646
+ path2 , lblk2 + len , 0 );
5647
+ if (* erp )
5648
+ break ;
5649
+ }
5650
+ /* ext4_split_extent_at() may retult in leaf extent split,
5651
+ * path must to be revalidated. */
5652
+ if (split )
5653
+ goto repeat ;
5654
+
5655
+ BUG_ON (e2_len != e1_len );
5656
+ * erp = ext4_ext_get_access (handle , inode1 , path1 + path1 -> p_depth );
5657
+ if (* erp )
5658
+ break ;
5659
+ * erp = ext4_ext_get_access (handle , inode2 , path2 + path2 -> p_depth );
5660
+ if (* erp )
5661
+ break ;
5662
+
5663
+ /* Both extents are fully inside boundaries. Swap it now */
5664
+ tmp_ex = * ex1 ;
5665
+ ext4_ext_store_pblock (ex1 , ext4_ext_pblock (ex2 ));
5666
+ ext4_ext_store_pblock (ex2 , ext4_ext_pblock (& tmp_ex ));
5667
+ ex1 -> ee_len = cpu_to_le16 (e2_len );
5668
+ ex2 -> ee_len = cpu_to_le16 (e1_len );
5669
+ if (unwritten )
5670
+ ext4_ext_mark_unwritten (ex2 );
5671
+ if (ext4_ext_is_unwritten (& tmp_ex ))
5672
+ ext4_ext_mark_unwritten (ex1 );
5673
+
5674
+ ext4_ext_try_to_merge (handle , inode2 , path2 , ex2 );
5675
+ ext4_ext_try_to_merge (handle , inode1 , path1 , ex1 );
5676
+ * erp = ext4_ext_dirty (handle , inode2 , path2 +
5677
+ path2 -> p_depth );
5678
+ if (* erp )
5679
+ break ;
5680
+ * erp = ext4_ext_dirty (handle , inode1 , path1 +
5681
+ path1 -> p_depth );
5682
+ /*
5683
+ * Looks scarry ah..? second inode already points to new blocks,
5684
+ * and it was successfully dirtied. But luckily error may happen
5685
+ * only due to journal error, so full transaction will be
5686
+ * aborted anyway.
5687
+ */
5688
+ if (* erp )
5689
+ break ;
5690
+ lblk1 += len ;
5691
+ lblk2 += len ;
5692
+ replaced_count += len ;
5693
+ count -= len ;
5694
+
5695
+ repeat :
5696
+ if (path1 ) {
5697
+ ext4_ext_drop_refs (path1 );
5698
+ kfree (path1 );
5699
+ path1 = NULL ;
5700
+ }
5701
+ if (path2 ) {
5702
+ ext4_ext_drop_refs (path2 );
5703
+ kfree (path2 );
5704
+ path2 = NULL ;
5705
+ }
5706
+ }
5707
+ if (path1 ) {
5708
+ ext4_ext_drop_refs (path1 );
5709
+ kfree (path1 );
5710
+ }
5711
+ if (path2 ) {
5712
+ ext4_ext_drop_refs (path2 );
5713
+ kfree (path2 );
5714
+ }
5715
+ return replaced_count ;
5716
+ }
0 commit comments