Skip to content

Commit accb811

Browse files
committed
YARN-6929. Improved partition algorithm for yarn remote-app-log-dir.
Contributed by Prabhu Joseph
1 parent dead9b4 commit accb811

File tree

10 files changed

+398
-122
lines changed

10 files changed

+398
-122
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,13 +1434,20 @@ public static boolean isAclEnabled(Configuration conf) {
14341434
public static final String DEFAULT_NM_REMOTE_APP_LOG_DIR = "/tmp/logs";
14351435

14361436
/**
1437-
* The remote log dir will be created at
1438-
* NM_REMOTE_APP_LOG_DIR/${user}/NM_REMOTE_APP_LOG_DIR_SUFFIX/${appId}
1437+
* The remote log dir will be created at below location.
1438+
* NM_REMOTE_APP_LOG_DIR/${user}/bucket_{NM_REMOTE_APP_LOG_DIR_SUFFIX}
1439+
* /${bucketDir}/${appId}
14391440
*/
14401441
public static final String NM_REMOTE_APP_LOG_DIR_SUFFIX =
14411442
NM_PREFIX + "remote-app-log-dir-suffix";
14421443
public static final String DEFAULT_NM_REMOTE_APP_LOG_DIR_SUFFIX="logs";
14431444

1445+
/** Specifies whether Older Application Log Directory is included. */
1446+
public static final String NM_REMOTE_APP_LOG_DIR_INCLUDE_OLDER =
1447+
NM_PREFIX + "remote-app-log-dir-include-older";
1448+
public static final boolean DEFAULT_NM_REMOTE_APP_LOG_DIR_INCLUDE_OLDER =
1449+
true;
1450+
14441451
public static final String YARN_LOG_SERVER_URL =
14451452
YARN_PREFIX + "log.server.url";
14461453

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogDeletionService.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ static class LogDeletionTask extends TimerTask {
6767
public LogDeletionTask(Configuration conf, long retentionSecs, ApplicationClientProtocol rmClient) {
6868
this.conf = conf;
6969
this.retentionMillis = retentionSecs * 1000;
70-
this.suffix = LogAggregationUtils.getRemoteNodeLogDirSuffix(conf);
70+
this.suffix = LogAggregationUtils.getBucketSuffix();
7171
this.remoteRootLogDir =
7272
new Path(conf.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
7373
YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
@@ -82,8 +82,18 @@ public void run() {
8282
FileSystem fs = remoteRootLogDir.getFileSystem(conf);
8383
for(FileStatus userDir : fs.listStatus(remoteRootLogDir)) {
8484
if(userDir.isDirectory()) {
85-
Path userDirPath = new Path(userDir.getPath(), suffix);
86-
deleteOldLogDirsFrom(userDirPath, cutoffMillis, fs, rmClient);
85+
for (FileStatus suffixDir : fs.listStatus(userDir.getPath())) {
86+
Path suffixDirPath = suffixDir.getPath();
87+
if (suffixDir.isDirectory() && suffixDirPath.getName().
88+
startsWith(suffix)) {
89+
for (FileStatus bucketDir : fs.listStatus(suffixDirPath)) {
90+
if (bucketDir.isDirectory()) {
91+
deleteOldLogDirsFrom(bucketDir.getPath(), cutoffMillis,
92+
fs, rmClient);
93+
}
94+
}
95+
}
96+
}
8797
}
8898
}
8999
} catch (Throwable t) {

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogAggregationUtils.java

Lines changed: 167 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
public class LogAggregationUtils {
3939

4040
public static final String TMP_FILE_SUFFIX = ".tmp";
41+
private static final String BUCKET_SUFFIX = "bucket_";
4142

4243
/**
4344
* Constructs the full filename for an application's log file per node.
@@ -64,8 +65,22 @@ public static Path getRemoteNodeLogFileForApp(Path remoteRootLogDir,
6465
*/
6566
public static Path getRemoteAppLogDir(Path remoteRootLogDir,
6667
ApplicationId appId, String user, String suffix) {
67-
return new Path(getRemoteLogSuffixedDir(remoteRootLogDir, user, suffix),
68-
appId.toString());
68+
return new Path(getRemoteBucketDir(remoteRootLogDir, user, suffix,
69+
appId), appId.toString());
70+
}
71+
72+
/**
73+
* Gets the older remote app log dir.
74+
* @param appId the application id
75+
* @param user the application owner
76+
* @param remoteRootLogDir the aggregated log remote root log dir
77+
* @param suffix the log directory suffix
78+
* @return the remote application specific log dir.
79+
*/
80+
public static Path getOlderRemoteAppLogDir(ApplicationId appId,
81+
String user, Path remoteRootLogDir, String suffix) {
82+
return new Path(getOlderRemoteLogSuffixedDir(remoteRootLogDir, user,
83+
suffix), appId.toString());
6984
}
7085

7186
/**
@@ -77,6 +92,19 @@ public static Path getRemoteAppLogDir(Path remoteRootLogDir,
7792
*/
7893
public static Path getRemoteLogSuffixedDir(Path remoteRootLogDir,
7994
String user, String suffix) {
95+
suffix = getBucketSuffix() + suffix;
96+
return new Path(getRemoteLogUserDir(remoteRootLogDir, user), suffix);
97+
}
98+
99+
/**
100+
* Gets the older remote suffixed log dir for the user.
101+
* @param remoteRootLogDir the aggregated log remote root log dir
102+
* @param user the application owner
103+
* @param suffix the log dir suffix
104+
* @return the older remote suffixed log dir.
105+
*/
106+
public static Path getOlderRemoteLogSuffixedDir(Path remoteRootLogDir,
107+
String user, String suffix) {
80108
if (suffix == null || suffix.isEmpty()) {
81109
return getRemoteLogUserDir(remoteRootLogDir, user);
82110
}
@@ -94,6 +122,33 @@ public static Path getRemoteLogUserDir(Path remoteRootLogDir, String user) {
94122
return new Path(remoteRootLogDir, user);
95123
}
96124

125+
/**
126+
* Gets the remote log user's bucket dir.
127+
* @param remoteRootLogDir the aggregated log remote root log dir
128+
* @param user the application owner
129+
* @param suffix the log dir suffix
130+
* @param appId the application id
131+
* @return the remote log per user per cluster timestamp per bucket dir.
132+
*/
133+
public static Path getRemoteBucketDir(Path remoteRootLogDir, String user,
134+
String suffix, ApplicationId appId) {
135+
int bucket = appId.getId() % 10000;
136+
String bucketDir = String.format("%04d", bucket);
137+
return new Path(getRemoteLogSuffixedDir(remoteRootLogDir,
138+
user, suffix), bucketDir);
139+
}
140+
141+
/**
142+
* Check if older Application Log Directory has to be included.
143+
* @param conf the configuration
144+
* @return Is Older App Log Dir enabled?
145+
*/
146+
public static boolean isOlderPathEnabled(Configuration conf) {
147+
return conf.getBoolean(YarnConfiguration.
148+
NM_REMOTE_APP_LOG_DIR_INCLUDE_OLDER,
149+
YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR_INCLUDE_OLDER);
150+
}
151+
97152
/**
98153
* Returns the suffix component of the log dir.
99154
* @param conf the configuration
@@ -104,6 +159,14 @@ public static String getRemoteNodeLogDirSuffix(Configuration conf) {
104159
YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR_SUFFIX);
105160
}
106161

162+
/**
163+
* Returns the bucket suffix component of the log dir.
164+
* @return the bucket suffix which appended to user log dir
165+
*/
166+
public static String getBucketSuffix() {
167+
return BUCKET_SUFFIX;
168+
}
169+
107170

108171
/**
109172
* Converts a nodeId to a form used in the app log file name.
@@ -174,6 +237,24 @@ public static org.apache.hadoop.fs.Path getRemoteAppLogDir(
174237
return remoteAppDir;
175238
}
176239

240+
/**
241+
* Get all available log files under remote app log directory.
242+
* @param conf the configuration
243+
* @param remoteAppLogDir the application log directory
244+
* @param appId the applicationId
245+
* @param appOwner the application owner
246+
* @return the iterator of available log files
247+
* @throws IOException if there is no log file directory
248+
*/
249+
public static RemoteIterator<FileStatus> getNodeFiles(Configuration conf,
250+
Path remoteAppLogDir, ApplicationId appId, String appOwner)
251+
throws IOException {
252+
Path qualifiedLogDir =
253+
FileContext.getFileContext(conf).makeQualified(remoteAppLogDir);
254+
return FileContext.getFileContext(
255+
qualifiedLogDir.toUri(), conf).listStatus(remoteAppLogDir);
256+
}
257+
177258
/**
178259
* Get all available log files under remote app log directory.
179260
* @param conf the configuration
@@ -188,14 +269,58 @@ public static RemoteIterator<FileStatus> getRemoteNodeFileDir(
188269
Configuration conf, ApplicationId appId, String appOwner,
189270
org.apache.hadoop.fs.Path remoteRootLogDir, String suffix)
190271
throws IOException {
272+
RemoteIterator<FileStatus> nodeFilesCur= null;
273+
RemoteIterator<FileStatus> nodeFilesPrev = null;
274+
StringBuilder diagnosticsMsg = new StringBuilder();
275+
276+
// Get Node Files from new app log dir
191277
Path remoteAppLogDir = getRemoteAppLogDir(conf, appId, appOwner,
192278
remoteRootLogDir, suffix);
193-
RemoteIterator<FileStatus> nodeFiles = null;
194-
Path qualifiedLogDir =
195-
FileContext.getFileContext(conf).makeQualified(remoteAppLogDir);
196-
nodeFiles = FileContext.getFileContext(qualifiedLogDir.toUri(),
197-
conf).listStatus(remoteAppLogDir);
198-
return nodeFiles;
279+
try {
280+
nodeFilesCur = getNodeFiles(conf, remoteAppLogDir, appId, appOwner);
281+
} catch (IOException ex) {
282+
diagnosticsMsg.append(ex.getMessage() + "\n");
283+
}
284+
285+
// Get Node Files from old app log dir
286+
if (isOlderPathEnabled(conf)) {
287+
remoteAppLogDir = getOlderRemoteAppLogDir(appId, appOwner,
288+
remoteRootLogDir, suffix);
289+
try {
290+
nodeFilesPrev = getNodeFiles(conf,
291+
remoteAppLogDir, appId, appOwner);
292+
} catch (IOException ex) {
293+
diagnosticsMsg.append(ex.getMessage() + "\n");
294+
}
295+
296+
// Return older files if new app log dir does not exist
297+
if (nodeFilesCur == null) {
298+
return nodeFilesPrev;
299+
} else if (nodeFilesPrev != null) {
300+
// Return both new and old node files combined
301+
RemoteIterator<FileStatus> curDir = nodeFilesCur;
302+
RemoteIterator<FileStatus> prevDir = nodeFilesPrev;
303+
RemoteIterator<FileStatus> nodeFilesCombined = new
304+
RemoteIterator<FileStatus>() {
305+
@Override
306+
public boolean hasNext() throws IOException {
307+
return prevDir.hasNext() || curDir.hasNext();
308+
}
309+
310+
@Override
311+
public FileStatus next() throws IOException {
312+
return prevDir.hasNext() ? prevDir.next() : curDir.next();
313+
}
314+
};
315+
return nodeFilesCombined;
316+
}
317+
}
318+
319+
// Error reading from or new app log dir does not exist
320+
if (nodeFilesCur == null) {
321+
throw new IOException(diagnosticsMsg.toString());
322+
}
323+
return nodeFilesCur;
199324
}
200325

201326
/**
@@ -212,13 +337,39 @@ public static List<FileStatus> getRemoteNodeFileList(
212337
Configuration conf, ApplicationId appId, String appOwner,
213338
org.apache.hadoop.fs.Path remoteRootLogDir, String suffix)
214339
throws IOException {
340+
StringBuilder diagnosticsMsg = new StringBuilder();
215341
Path remoteAppLogDir = getRemoteAppLogDir(conf, appId, appOwner,
216342
remoteRootLogDir, suffix);
217343
List<FileStatus> nodeFiles = new ArrayList<>();
218344
Path qualifiedLogDir =
219345
FileContext.getFileContext(conf).makeQualified(remoteAppLogDir);
220-
nodeFiles.addAll(Arrays.asList(FileContext.getFileContext(
221-
qualifiedLogDir.toUri(), conf).util().listStatus(remoteAppLogDir)));
346+
347+
// Get Node Files from new app log dir
348+
try {
349+
nodeFiles.addAll(Arrays.asList(FileContext.getFileContext(
350+
qualifiedLogDir.toUri(), conf).util().listStatus(remoteAppLogDir)));
351+
} catch (IOException ex) {
352+
diagnosticsMsg.append(ex.getMessage() + "\n");
353+
}
354+
355+
// Get Node Files from old app log dir
356+
if (isOlderPathEnabled(conf)) {
357+
remoteAppLogDir = getOlderRemoteAppLogDir(appId, appOwner,
358+
remoteRootLogDir, suffix);
359+
qualifiedLogDir = FileContext.getFileContext(conf).
360+
makeQualified(remoteAppLogDir);
361+
try {
362+
nodeFiles.addAll(Arrays.asList(FileContext.getFileContext(
363+
qualifiedLogDir.toUri(), conf).util().listStatus(remoteAppLogDir)));
364+
} catch (IOException ex) {
365+
diagnosticsMsg.append(ex.getMessage() + "\n");
366+
}
367+
}
368+
369+
// Error reading from or new app log dir does not exist
370+
if (nodeFiles.isEmpty()) {
371+
throw new IOException(diagnosticsMsg.toString());
372+
}
222373
return nodeFiles;
223374
}
224375

@@ -233,12 +384,11 @@ public static List<FileStatus> getRemoteNodeFileList(
233384
public static RemoteIterator<FileStatus> getRemoteNodeFileDir(
234385
Configuration conf, ApplicationId appId, String appOwner)
235386
throws IOException {
236-
Path remoteAppLogDir = getRemoteAppLogDir(conf, appId, appOwner);
237-
RemoteIterator<FileStatus> nodeFiles = null;
238-
Path qualifiedLogDir =
239-
FileContext.getFileContext(conf).makeQualified(remoteAppLogDir);
240-
nodeFiles = FileContext.getFileContext(qualifiedLogDir.toUri(),
241-
conf).listStatus(remoteAppLogDir);
242-
return nodeFiles;
387+
String suffix = LogAggregationUtils.getRemoteNodeLogDirSuffix(conf);
388+
Path remoteRootLogDir = new Path(conf.get(
389+
YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
390+
YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
391+
return getRemoteNodeFileDir(conf, appId, appOwner,
392+
remoteRootLogDir, suffix);
243393
}
244394
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@
3131
import java.util.Collections;
3232
import java.util.Comparator;
3333
import java.util.HashSet;
34+
import java.util.LinkedList;
3435
import java.util.List;
3536
import java.util.Map;
3637
import java.util.Set;
38+
3739
import org.apache.commons.lang3.StringUtils;
3840
import org.apache.hadoop.classification.InterfaceAudience.Public;
3941
import org.apache.hadoop.classification.InterfaceStability.Unstable;
@@ -361,32 +363,25 @@ public Object run() throws Exception {
361363
// unnecessary load on the filesystem from all of the nodes
362364
Path appDir = LogAggregationUtils.getRemoteAppLogDir(
363365
remoteRootLogDir, appId, user, remoteRootLogDirSuffix);
364-
365-
appDir = appDir.makeQualified(remoteFS.getUri(),
366+
Path curDir = appDir.makeQualified(remoteFS.getUri(),
367+
remoteFS.getWorkingDirectory());
368+
Path rootLogDir = remoteRootLogDir.makeQualified(remoteFS.getUri(),
366369
remoteFS.getWorkingDirectory());
367370

368-
if (!checkExists(remoteFS, appDir, APP_DIR_PERMISSIONS)) {
369-
Path suffixDir = LogAggregationUtils.getRemoteLogSuffixedDir(
370-
remoteRootLogDir, user, remoteRootLogDirSuffix);
371-
suffixDir = suffixDir.makeQualified(remoteFS.getUri(),
372-
remoteFS.getWorkingDirectory());
373-
374-
if (!checkExists(remoteFS, suffixDir, APP_DIR_PERMISSIONS)) {
375-
Path userDir = LogAggregationUtils.getRemoteLogUserDir(
376-
remoteRootLogDir, user);
377-
userDir = userDir.makeQualified(remoteFS.getUri(),
378-
remoteFS.getWorkingDirectory());
379-
380-
if (!checkExists(remoteFS, userDir, APP_DIR_PERMISSIONS)) {
381-
createDir(remoteFS, userDir, APP_DIR_PERMISSIONS);
382-
}
371+
LinkedList<Path> pathsToCreate = new LinkedList<>();
383372

384-
createDir(remoteFS, suffixDir, APP_DIR_PERMISSIONS);
373+
while (!curDir.equals(rootLogDir)) {
374+
if (!checkExists(remoteFS, curDir, APP_DIR_PERMISSIONS)) {
375+
pathsToCreate.addFirst(curDir);
376+
curDir = curDir.getParent();
377+
} else {
378+
break;
385379
}
386-
387-
createDir(remoteFS, appDir, APP_DIR_PERMISSIONS);
388380
}
389381

382+
for (Path path : pathsToCreate) {
383+
createDir(remoteFS, path, APP_DIR_PERMISSIONS);
384+
}
390385
} catch (IOException e) {
391386
LOG.error("Failed to setup application log directory for "
392387
+ appId, e);
@@ -411,7 +406,6 @@ protected FileSystem getFileSystem(Configuration conf) throws IOException {
411406

412407
protected void createDir(FileSystem fs, Path path, FsPermission fsPerm)
413408
throws IOException {
414-
415409
if (fsSupportsChmod) {
416410
FsPermission dirPerm = new FsPermission(fsPerm);
417411
fs.mkdirs(path, dirPerm);
@@ -467,6 +461,19 @@ public Path getRemoteAppLogDir(ApplicationId appId, String appOwner)
467461
this.remoteRootLogDir, this.remoteRootLogDirSuffix);
468462
}
469463

464+
/**
465+
* Get the older remote application directory for log aggregation.
466+
* @param appId the Application ID
467+
* @param appOwner the Application Owner
468+
* @return the older remote application directory
469+
* @throws IOException if can not find the remote application directory
470+
*/
471+
public Path getOlderRemoteAppLogDir(ApplicationId appId, String appOwner)
472+
throws IOException {
473+
return LogAggregationUtils.getOlderRemoteAppLogDir(appId, appOwner,
474+
this.remoteRootLogDir, this.remoteRootLogDirSuffix);
475+
}
476+
470477
protected void cleanOldLogs(Path remoteNodeLogFileForApp,
471478
final NodeId nodeId, UserGroupInformation userUgi) {
472479
try {

0 commit comments

Comments
 (0)