Skip to content

Commit d4ab9ae

Browse files
lokeshj1703bshashikant
authored andcommitted
HDDS-1816: ContainerStateMachine should limit number of pending apply transactions. Adds a config, uses snapshot threshold default value. (#1150)
1 parent e5c4131 commit d4ab9ae

File tree

3 files changed

+30
-2
lines changed

3 files changed

+30
-2
lines changed

hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,14 @@ public final class ScmConfigKeys {
9090
"dfs.container.ratis.statemachinedata.sync.retries";
9191
public static final int
9292
DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT = -1;
93+
public static final String
94+
DFS_CONTAINER_RATIS_STATEMACHINE_MAX_PENDING_APPLY_TRANSACTIONS =
95+
"dfs.container.ratis.statemachine.max.pending.apply-transactions";
96+
// The default value of maximum number of pending state machine apply
97+
// transactions is kept same as default snapshot threshold.
98+
public static final int
99+
DFS_CONTAINER_RATIS_STATEMACHINE_MAX_PENDING_APPLY_TRANSACTIONS_DEFAULT =
100+
100000;
93101
public static final String DFS_CONTAINER_RATIS_LOG_QUEUE_NUM_ELEMENTS =
94102
"dfs.container.ratis.log.queue.num-elements";
95103
public static final int DFS_CONTAINER_RATIS_LOG_QUEUE_NUM_ELEMENTS_DEFAULT =

hadoop-hdds/common/src/main/resources/ozone-default.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,15 @@
186186
taken.
187187
</description>
188188
</property>
189+
<property>
190+
<name>dfs.container.ratis.statemachine.max.pending.apply-transactions</name>
191+
<value>10000</value>
192+
<tag>OZONE, RATIS</tag>
193+
<description>Maximum number of pending apply transactions in a data
194+
pipeline. The default value is kept same as default snapshot threshold
195+
dfs.ratis.snapshot.threshold.
196+
</description>
197+
</property>
189198
<property>
190199
<name>dfs.container.ratis.num.write.chunk.threads</name>
191200
<value>60</value>

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.hadoop.hdds.HddsUtils;
2727
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
2828

29+
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
2930
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException;
3031
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
3132
import org.apache.hadoop.ozone.OzoneConfigKeys;
@@ -79,6 +80,7 @@
7980
import java.util.concurrent.ConcurrentHashMap;
8081
import java.util.concurrent.ThreadPoolExecutor;
8182
import java.util.concurrent.ExecutorService;
83+
import java.util.concurrent.Semaphore;
8284
import java.util.concurrent.TimeUnit;
8385
import java.util.concurrent.ExecutionException;
8486
import java.util.stream.Collectors;
@@ -146,6 +148,8 @@ public class ContainerStateMachine extends BaseStateMachine {
146148
private final Cache<Long, ByteString> stateMachineDataCache;
147149
private final boolean isBlockTokenEnabled;
148150
private final TokenVerifier tokenVerifier;
151+
152+
private final Semaphore applyTransactionSemaphore;
149153
/**
150154
* CSM metrics.
151155
*/
@@ -175,6 +179,12 @@ public ContainerStateMachine(RaftGroupId gid, ContainerDispatcher dispatcher,
175179
final int numContainerOpExecutors = conf.getInt(
176180
OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_CONTAINER_OP_EXECUTORS_KEY,
177181
OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_CONTAINER_OP_EXECUTORS_DEFAULT);
182+
int maxPendingApplyTransactions = conf.getInt(
183+
ScmConfigKeys.
184+
DFS_CONTAINER_RATIS_STATEMACHINE_MAX_PENDING_APPLY_TRANSACTIONS,
185+
ScmConfigKeys.
186+
DFS_CONTAINER_RATIS_STATEMACHINE_MAX_PENDING_APPLY_TRANSACTIONS_DEFAULT);
187+
applyTransactionSemaphore = new Semaphore(maxPendingApplyTransactions);
178188
this.executors = new ExecutorService[numContainerOpExecutors];
179189
for (int i = 0; i < numContainerOpExecutors; i++) {
180190
final int index = i;
@@ -626,6 +636,7 @@ public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
626636
.setLogIndex(index);
627637

628638
try {
639+
applyTransactionSemaphore.acquire();
629640
metrics.incNumApplyTransactionsOps();
630641
ContainerCommandRequestProto requestProto =
631642
getContainerCommandRequestProto(
@@ -663,9 +674,9 @@ public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
663674
requestProto.getWriteChunk().getChunkData().getLen());
664675
}
665676
updateLastApplied();
666-
});
677+
}).whenComplete((r, t) -> applyTransactionSemaphore.release());
667678
return future;
668-
} catch (IOException e) {
679+
} catch (IOException | InterruptedException e) {
669680
metrics.incNumApplyTransactionsFails();
670681
return completeExceptionally(e);
671682
}

0 commit comments

Comments
 (0)