Skip to content

Commit 5565f2c

Browse files
committed
MAPREDUCE-7198. mapreduce.task.timeout=0 configuration used to disable timeout doesn't work.
1 parent 9c61494 commit 5565f2c

File tree

3 files changed

+46
-3
lines changed

3 files changed

+46
-3
lines changed

hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,8 @@ private void checkRunning(long currentTime) {
192192
(currentTime > (entry.getValue().getLastProgress() + taskTimeOut));
193193
// when container in NM not started in a long time,
194194
// we think the taskAttempt is stuck
195-
boolean taskStuck = (!entry.getValue().isReported()) &&
195+
boolean taskStuck = (taskStuckTimeOut > 0) &&
196+
(!entry.getValue().isReported()) &&
196197
(currentTime >
197198
(entry.getValue().getLastProgress() + taskStuckTimeOut));
198199

@@ -225,7 +226,7 @@ private void checkRecentlyUnregistered(long currentTime) {
225226
}
226227

227228
@VisibleForTesting
228-
ConcurrentMap getRunningAttempts(){
229+
ConcurrentMap<TaskAttemptId, ReportTime> getRunningAttempts(){
229230
return runningAttempts;
230231
}
231232

hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestTaskHeartbeatHandler.java

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import static org.junit.Assert.assertFalse;
2222
import static org.mockito.ArgumentMatchers.any;
2323
import static org.mockito.Mockito.mock;
24+
import static org.mockito.Mockito.never;
2425
import static org.mockito.Mockito.times;
2526
import static org.mockito.Mockito.verify;
2627

@@ -48,7 +49,7 @@
4849

4950
public class TestTaskHeartbeatHandler {
5051

51-
@SuppressWarnings({ "rawtypes", "unchecked" })
52+
@SuppressWarnings("unchecked")
5253
@Test
5354
public void testTaskTimeout() throws InterruptedException {
5455
EventHandler mockHandler = mock(EventHandler.class);
@@ -81,6 +82,46 @@ public void testTaskTimeout() throws InterruptedException {
8182
}
8283
}
8384

85+
@Test
86+
@SuppressWarnings("unchecked")
87+
public void testTaskTimeoutDisable() throws InterruptedException {
88+
EventHandler mockHandler = mock(EventHandler.class);
89+
Clock clock = SystemClock.getInstance();
90+
TaskHeartbeatHandler hb = new TaskHeartbeatHandler(mockHandler, clock, 1);
91+
92+
Configuration conf = new Configuration();
93+
conf.setLong(MRJobConfig.TASK_STUCK_TIMEOUT_MS, 0); // no timeout
94+
conf.setInt(MRJobConfig.TASK_TIMEOUT, 0); // no timeout
95+
// set TASK_PROGRESS_REPORT_INTERVAL to a value smaller than TASK_TIMEOUT
96+
// so that TASK_TIMEOUT is not overridden
97+
conf.setLong(MRJobConfig.TASK_PROGRESS_REPORT_INTERVAL, 0);
98+
conf.setInt(MRJobConfig.TASK_TIMEOUT_CHECK_INTERVAL_MS, 10); //10 ms
99+
100+
hb.init(conf);
101+
hb.start();
102+
try {
103+
ApplicationId appId = ApplicationId.newInstance(0L, 5);
104+
JobId jobId = MRBuilderUtils.newJobId(appId, 4);
105+
TaskId tid = MRBuilderUtils.newTaskId(jobId, 3, TaskType.MAP);
106+
TaskAttemptId taid = MRBuilderUtils.newTaskAttemptId(tid, 2);
107+
hb.register(taid);
108+
109+
ConcurrentMap<TaskAttemptId, TaskHeartbeatHandler.ReportTime>
110+
runningAttempts = hb.getRunningAttempts();
111+
for (Map.Entry<TaskAttemptId, TaskHeartbeatHandler.ReportTime> entry
112+
: runningAttempts.entrySet()) {
113+
assertFalse(entry.getValue().isReported());
114+
}
115+
116+
Thread.sleep(100);
117+
118+
// Timeout is disabled, so the task should not be canceled
119+
verify(mockHandler, never()).handle(any(Event.class));
120+
} finally {
121+
hb.stop();
122+
}
123+
}
124+
84125
@SuppressWarnings("unchecked")
85126
@Test
86127
public void testTaskStuck() throws InterruptedException {

hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@
282282
<description>The max timeout before receiving remote task's first heartbeat.
283283
This parameter is in order to avoid waiting for the container
284284
to start indefinitely, which made task stuck in the NEW state.
285+
A value of 0 disables the timeout.
285286
</description>
286287
</property>
287288

0 commit comments

Comments
 (0)