Skip to content

Commit 01ff3de

Browse files
HBASE-29210: Introduce Validation for PITR-Critical Backup Deletion (#6848)
Signed-off-by: Andor Molnár <[email protected]> Signed-off-by: Wellington Chevreuil <[email protected]>
1 parent 5c9bcbd commit 01ff3de

File tree

5 files changed

+369
-8
lines changed

5 files changed

+369
-8
lines changed

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,16 @@
1818
package org.apache.hadoop.hbase.backup;
1919

2020
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.LONG_OPTION_ENABLE_CONTINUOUS_BACKUP;
21+
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.LONG_OPTION_FORCE_DELETE;
2122
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BACKUP_LIST_DESC;
2223
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH;
2324
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH_DESC;
2425
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG;
2526
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG_DESC;
2627
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_ENABLE_CONTINUOUS_BACKUP;
2728
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_ENABLE_CONTINUOUS_BACKUP_DESC;
29+
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_FORCE_DELETE;
30+
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_FORCE_DELETE_DESC;
2831
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM;
2932
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM_DESC;
3033
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP;
@@ -164,6 +167,7 @@ protected void addOptions() {
164167
addOptWithArg(OPTION_YARN_QUEUE_NAME, OPTION_YARN_QUEUE_NAME_DESC);
165168
addOptNoArg(OPTION_ENABLE_CONTINUOUS_BACKUP, LONG_OPTION_ENABLE_CONTINUOUS_BACKUP,
166169
OPTION_ENABLE_CONTINUOUS_BACKUP_DESC);
170+
addOptNoArg(OPTION_FORCE_DELETE, LONG_OPTION_FORCE_DELETE, OPTION_FORCE_DELETE_DESC);
167171
}
168172

169173
@Override

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,11 @@ public interface BackupRestoreConstants {
101101
String OPTION_ENABLE_CONTINUOUS_BACKUP_DESC =
102102
"Flag indicating that the full backup is part of a continuous backup process.";
103103

104+
String OPTION_FORCE_DELETE = "fd";
105+
String LONG_OPTION_FORCE_DELETE = "force-delete";
106+
String OPTION_FORCE_DELETE_DESC =
107+
"Flag to forcefully delete the backup, even if it may be required for Point-in-Time Restore";
108+
104109
String JOB_NAME_CONF_KEY = "mapreduce.job.name";
105110

106111
String BACKUP_CONFIG_STRING =
@@ -134,6 +139,9 @@ public interface BackupRestoreConstants {
134139

135140
String CONF_CONTINUOUS_BACKUP_WAL_DIR = "hbase.backup.continuous.wal.dir";
136141

142+
String CONF_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS = "hbase.backup.continuous.pitr.window.days";
143+
long DEFAULT_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS = 30;
144+
137145
enum BackupCommand {
138146
CREATE,
139147
CANCEL,

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java

Lines changed: 168 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,17 @@
1717
*/
1818
package org.apache.hadoop.hbase.backup.impl;
1919

20+
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS;
21+
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.DEFAULT_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS;
2022
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BACKUP_LIST_DESC;
2123
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH;
2224
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_BANDWIDTH_DESC;
2325
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG;
2426
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_DEBUG_DESC;
2527
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_ENABLE_CONTINUOUS_BACKUP;
2628
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_ENABLE_CONTINUOUS_BACKUP_DESC;
29+
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_FORCE_DELETE;
30+
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_FORCE_DELETE_DESC;
2731
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM;
2832
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_IGNORECHECKSUM_DESC;
2933
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_KEEP;
@@ -46,8 +50,12 @@
4650

4751
import java.io.IOException;
4852
import java.net.URI;
53+
import java.util.ArrayList;
4954
import java.util.List;
55+
import java.util.Map;
5056
import java.util.Set;
57+
import java.util.concurrent.TimeUnit;
58+
import org.agrona.collections.MutableLong;
5159
import org.apache.commons.lang3.StringUtils;
5260
import org.apache.hadoop.conf.Configuration;
5361
import org.apache.hadoop.conf.Configured;
@@ -632,15 +640,18 @@ public void execute() throws IOException {
632640
printUsage();
633641
throw new IOException(INCORRECT_USAGE);
634642
}
643+
644+
boolean isForceDelete = cmdline.hasOption(OPTION_FORCE_DELETE);
635645
super.execute();
636646
if (cmdline.hasOption(OPTION_KEEP)) {
637-
executeDeleteOlderThan(cmdline);
647+
executeDeleteOlderThan(cmdline, isForceDelete);
638648
} else if (cmdline.hasOption(OPTION_LIST)) {
639-
executeDeleteListOfBackups(cmdline);
649+
executeDeleteListOfBackups(cmdline, isForceDelete);
640650
}
641651
}
642652

643-
private void executeDeleteOlderThan(CommandLine cmdline) throws IOException {
653+
private void executeDeleteOlderThan(CommandLine cmdline, boolean isForceDelete)
654+
throws IOException {
644655
String value = cmdline.getOptionValue(OPTION_KEEP);
645656
int days = 0;
646657
try {
@@ -662,6 +673,7 @@ public boolean apply(BackupInfo info) {
662673
BackupAdminImpl admin = new BackupAdminImpl(conn)) {
663674
history = sysTable.getBackupHistory(-1, dateFilter);
664675
String[] backupIds = convertToBackupIds(history);
676+
validatePITRBackupDeletion(backupIds, isForceDelete);
665677
int deleted = admin.deleteBackups(backupIds);
666678
System.out.println("Deleted " + deleted + " backups. Total older than " + days + " days: "
667679
+ backupIds.length);
@@ -680,10 +692,11 @@ private String[] convertToBackupIds(List<BackupInfo> history) {
680692
return ids;
681693
}
682694

683-
private void executeDeleteListOfBackups(CommandLine cmdline) throws IOException {
695+
private void executeDeleteListOfBackups(CommandLine cmdline, boolean isForceDelete)
696+
throws IOException {
684697
String value = cmdline.getOptionValue(OPTION_LIST);
685698
String[] backupIds = value.split(",");
686-
699+
validatePITRBackupDeletion(backupIds, isForceDelete);
687700
try (BackupAdminImpl admin = new BackupAdminImpl(conn)) {
688701
int deleted = admin.deleteBackups(backupIds);
689702
System.out.println("Deleted " + deleted + " backups. Total requested: " + backupIds.length);
@@ -695,12 +708,162 @@ private void executeDeleteListOfBackups(CommandLine cmdline) throws IOException
695708

696709
}
697710

711+
/**
712+
* Validates whether the specified backups can be deleted while preserving Point-In-Time
713+
* Recovery (PITR) capabilities. If a backup is the only remaining full backup enabling PITR for
714+
* certain tables, deletion is prevented unless forced.
715+
* @param backupIds Array of backup IDs to validate.
716+
* @param isForceDelete Flag indicating whether deletion should proceed regardless of PITR
717+
* constraints.
718+
* @throws IOException If a backup is essential for PITR and force deletion is not enabled.
719+
*/
720+
private void validatePITRBackupDeletion(String[] backupIds, boolean isForceDelete)
721+
throws IOException {
722+
if (!isForceDelete) {
723+
for (String backupId : backupIds) {
724+
List<TableName> affectedTables = getTablesDependentOnBackupForPITR(backupId);
725+
if (!affectedTables.isEmpty()) {
726+
String errMsg = String.format(
727+
"Backup %s is the only FULL backup remaining that enables PITR for tables: %s. "
728+
+ "Use the force option to delete it anyway.",
729+
backupId, affectedTables);
730+
System.err.println(errMsg);
731+
throw new IOException(errMsg);
732+
}
733+
}
734+
}
735+
}
736+
737+
/**
738+
* Identifies tables that rely on the specified backup for PITR. If a table has no other valid
739+
* FULL backups that can facilitate recovery to all points within the PITR retention window, it
740+
* is added to the dependent list.
741+
* @param backupId The backup ID being evaluated.
742+
* @return List of tables dependent on the specified backup for PITR.
743+
* @throws IOException If backup metadata cannot be retrieved.
744+
*/
745+
private List<TableName> getTablesDependentOnBackupForPITR(String backupId) throws IOException {
746+
List<TableName> dependentTables = new ArrayList<>();
747+
748+
try (final BackupSystemTable backupSystemTable = new BackupSystemTable(conn)) {
749+
BackupInfo targetBackup = backupSystemTable.readBackupInfo(backupId);
750+
751+
if (targetBackup == null) {
752+
throw new IOException("Backup info not found for backupId: " + backupId);
753+
}
754+
755+
// Only full backups are mandatory for PITR
756+
if (!BackupType.FULL.equals(targetBackup.getType())) {
757+
return List.of();
758+
}
759+
760+
// Retrieve the tables with continuous backup enabled and their start times
761+
Map<TableName, Long> continuousBackupStartTimes =
762+
backupSystemTable.getContinuousBackupTableSet();
763+
764+
// Determine the PITR time window
765+
long pitrWindowDays = getConf().getLong(CONF_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS,
766+
DEFAULT_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS);
767+
long currentTime = EnvironmentEdgeManager.getDelegate().currentTime();
768+
final MutableLong pitrMaxStartTime =
769+
new MutableLong(currentTime - TimeUnit.DAYS.toMillis(pitrWindowDays));
770+
771+
// For all tables, determine the earliest (minimum) continuous backup start time.
772+
// This represents the actual earliest point-in-time recovery (PITR) timestamp
773+
// that can be used, ensuring we do not go beyond the available backup data.
774+
long minContinuousBackupStartTime = currentTime;
775+
for (TableName table : targetBackup.getTableNames()) {
776+
minContinuousBackupStartTime = Math.min(minContinuousBackupStartTime,
777+
continuousBackupStartTimes.getOrDefault(table, currentTime));
778+
}
779+
780+
// The PITR max start time should be the maximum of the calculated minimum continuous backup
781+
// start time and the default PITR max start time (based on the configured window).
782+
// This ensures that PITR does not extend beyond what is practically possible.
783+
pitrMaxStartTime.set(Math.max(minContinuousBackupStartTime, pitrMaxStartTime.longValue()));
784+
785+
for (TableName table : targetBackup.getTableNames()) {
786+
// This backup is not necessary for this table since it doesn't have PITR enabled
787+
if (!continuousBackupStartTimes.containsKey(table)) {
788+
continue;
789+
}
790+
if (
791+
!isValidPITRBackup(targetBackup, table, continuousBackupStartTimes,
792+
pitrMaxStartTime.longValue())
793+
) {
794+
continue; // This backup is not crucial for PITR of this table
795+
}
796+
797+
// Check if another valid full backup exists for this table
798+
List<BackupInfo> backupHistory = backupSystemTable.getBackupInfos(BackupState.COMPLETE);
799+
boolean hasAnotherValidBackup = backupHistory.stream()
800+
.anyMatch(backup -> !backup.getBackupId().equals(backupId) && isValidPITRBackup(backup,
801+
table, continuousBackupStartTimes, pitrMaxStartTime.longValue()));
802+
803+
if (!hasAnotherValidBackup) {
804+
dependentTables.add(table);
805+
}
806+
}
807+
}
808+
return dependentTables;
809+
}
810+
811+
/**
812+
* Determines if a given backup is a valid candidate for Point-In-Time Recovery (PITR) for a
813+
* specific table. A valid backup ensures that recovery is possible to any point within the PITR
814+
* retention window. A backup qualifies if:
815+
* <ul>
816+
* <li>It is a FULL backup.</li>
817+
* <li>It contains the specified table.</li>
818+
* <li>Its completion timestamp is before the PITR retention window start time.</li>
819+
* <li>Its completion timestamp is on or after the table’s continuous backup start time.</li>
820+
* </ul>
821+
* @param backupInfo The backup information being evaluated.
822+
* @param tableName The table for which PITR validity is being checked.
823+
* @param continuousBackupTables A map of tables to their continuous backup start time.
824+
* @param pitrMaxStartTime The maximum allowed start timestamp for PITR eligibility.
825+
* @return {@code true} if the backup enables recovery to all valid points in time for the
826+
* table; {@code false} otherwise.
827+
*/
828+
private boolean isValidPITRBackup(BackupInfo backupInfo, TableName tableName,
829+
Map<TableName, Long> continuousBackupTables, long pitrMaxStartTime) {
830+
// Only FULL backups are mandatory for PITR
831+
if (!BackupType.FULL.equals(backupInfo.getType())) {
832+
return false;
833+
}
834+
835+
// The backup must include the table to be relevant for PITR
836+
if (!backupInfo.getTableNames().contains(tableName)) {
837+
return false;
838+
}
839+
840+
// The backup must have been completed before the PITR retention window starts,
841+
// otherwise, it won't be helpful in cases where the recovery point is between
842+
// pitrMaxStartTime and the backup completion time.
843+
if (backupInfo.getCompleteTs() > pitrMaxStartTime) {
844+
return false;
845+
}
846+
847+
// Retrieve the table's continuous backup start time
848+
long continuousBackupStartTime = continuousBackupTables.getOrDefault(tableName, 0L);
849+
850+
// The backup must have been started on or after the table’s continuous backup start time,
851+
// otherwise, it won't be helpful in few cases because we wouldn't have the WAL entries
852+
// between the backup start time and the continuous backup start time.
853+
if (backupInfo.getStartTs() < continuousBackupStartTime) {
854+
return false;
855+
}
856+
857+
return true;
858+
}
859+
698860
@Override
699861
protected void printUsage() {
700862
System.out.println(DELETE_CMD_USAGE);
701863
Options options = new Options();
702864
options.addOption(OPTION_KEEP, true, OPTION_KEEP_DESC);
703865
options.addOption(OPTION_LIST, true, OPTION_BACKUP_LIST_DESC);
866+
options.addOption(OPTION_FORCE_DELETE, false, OPTION_FORCE_DELETE_DESC);
704867

705868
HelpFormatter helpFormatter = new HelpFormatter();
706869
helpFormatter.setLeftPadding(2);

hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDelete.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import static org.junit.Assert.assertEquals;
2121
import static org.junit.Assert.assertTrue;
22+
import static org.junit.Assert.fail;
2223

2324
import java.io.ByteArrayOutputStream;
2425
import java.io.PrintStream;
@@ -32,7 +33,6 @@
3233
import org.apache.hadoop.hbase.util.EnvironmentEdge;
3334
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
3435
import org.apache.hadoop.util.ToolRunner;
35-
import org.junit.Assert;
3636
import org.junit.ClassRule;
3737
import org.junit.Test;
3838
import org.junit.experimental.categories.Category;
@@ -138,7 +138,7 @@ public long currentTime() {
138138
assertTrue(ret == 0);
139139
} catch (Exception e) {
140140
LOG.error("failed", e);
141-
Assert.fail(e.getMessage());
141+
fail(e.getMessage());
142142
}
143143
String output = baos.toString();
144144
LOG.info(baos.toString());
@@ -154,7 +154,7 @@ public long currentTime() {
154154
assertTrue(ret == 0);
155155
} catch (Exception e) {
156156
LOG.error("failed", e);
157-
Assert.fail(e.getMessage());
157+
fail(e.getMessage());
158158
}
159159
output = baos.toString();
160160
LOG.info(baos.toString());

0 commit comments

Comments
 (0)