|
19 | 19 | package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt; |
20 | 20 |
|
21 | 21 | import java.util.HashMap; |
| 22 | +import java.util.List; |
22 | 23 | import java.util.Map; |
23 | 24 | import java.util.concurrent.ConcurrentHashMap; |
24 | 25 | import java.util.concurrent.atomic.AtomicBoolean; |
|
29 | 30 | import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; |
30 | 31 |
|
31 | 32 | import org.apache.commons.lang3.time.DateUtils; |
| 33 | +import org.apache.hadoop.util.Time; |
| 34 | +import org.apache.hadoop.yarn.api.records.Container; |
| 35 | +import org.apache.hadoop.yarn.api.records.ExecutionType; |
| 36 | +import org.apache.hadoop.yarn.api.records.ResourceRequest; |
| 37 | +import org.apache.hadoop.yarn.server.resourcemanager.ClusterMetrics; |
32 | 38 | import org.slf4j.Logger; |
33 | 39 | import org.slf4j.LoggerFactory; |
34 | 40 | import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; |
@@ -63,13 +69,18 @@ public class RMAppAttemptMetrics { |
63 | 69 | new int[NodeType.values().length][NodeType.values().length]; |
64 | 70 | private volatile int totalAllocatedContainers; |
65 | 71 |
|
| 72 | + private ConcurrentHashMap<Long, Long> allocationGuaranteedLatencies = null; |
| 73 | + private ConcurrentHashMap<Long, Long> allocationOpportunisticLatencies = null; |
| 74 | + |
66 | 75 | public RMAppAttemptMetrics(ApplicationAttemptId attemptId, |
67 | 76 | RMContext rmContext) { |
68 | 77 | this.attemptId = attemptId; |
69 | 78 | ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); |
70 | 79 | this.readLock = lock.readLock(); |
71 | 80 | this.writeLock = lock.writeLock(); |
72 | 81 | this.rmContext = rmContext; |
| 82 | + this.allocationGuaranteedLatencies = new ConcurrentHashMap<Long, Long>(); |
| 83 | + this.allocationOpportunisticLatencies = new ConcurrentHashMap<Long, Long>(); |
73 | 84 | } |
74 | 85 |
|
75 | 86 | public void updatePreemptionInfo(Resource resource, RMContainer container) { |
@@ -242,4 +253,149 @@ public Resource getApplicationAttemptHeadroom() { |
242 | 253 | public void setApplicationAttemptHeadRoom(Resource headRoom) { |
243 | 254 | this.applicationHeadroom = headRoom; |
244 | 255 | } |
| 256 | + |
| 257 | + /** |
| 258 | + * Add allocationID latency to the application ID with a timestap = |
| 259 | + * CurrentTime (guaranteed) |
| 260 | + * |
| 261 | + * @param allocId the allocation Id to add If the allocation ID is already |
| 262 | + * present (which shouldn't happen) it ignores the entry |
| 263 | + */ |
| 264 | + public void addAllocationGuarLatencyIfNotExists(long allocId) { |
| 265 | + addAllocationGuarLatencyIfNotExists(allocId, System.currentTimeMillis()); |
| 266 | + } |
| 267 | + |
| 268 | + /** |
| 269 | + * Add allocationID latency to the application ID with a specific timestamp |
| 270 | + * (guaranteed) |
| 271 | + * |
| 272 | + * @param allocId allocationId |
| 273 | + * @param timestamp the timestamp to associate |
| 274 | + */ |
| 275 | + public void addAllocationGuarLatencyIfNotExists(long allocId, |
| 276 | + long timestamp) { |
| 277 | + allocationGuaranteedLatencies.putIfAbsent(allocId, timestamp); |
| 278 | + } |
| 279 | + |
| 280 | + /** |
| 281 | + * Add allocationID latency to the application ID with a timestap = |
| 282 | + * CurrentTime (opportunistic) |
| 283 | + * |
| 284 | + * @param allocId the allocation Id to add If the allocation ID is already |
| 285 | + * present (which shouldn't happen) it ignores the entry |
| 286 | + */ |
| 287 | + public void addAllocationOppLatencyIfNotExists(long allocId) { |
| 288 | + this.addAllocationOppLatencyIfNotExists(allocId, |
| 289 | + System.currentTimeMillis()); |
| 290 | + } |
| 291 | + |
| 292 | + /** |
| 293 | + * Add allocationID latency to the application ID with a specific timestamp |
| 294 | + * (opportunistic) |
| 295 | + * |
| 296 | + * @param allocId allocationId |
| 297 | + * @param timestamp the timestamp to associate |
| 298 | + */ |
| 299 | + public void addAllocationOppLatencyIfNotExists(long allocId, long timestamp) { |
| 300 | + allocationOpportunisticLatencies.putIfAbsent(allocId, timestamp); |
| 301 | + } |
| 302 | + |
| 303 | + /** |
| 304 | + * Returns the time associated when the allocation Id was added This method |
| 305 | + * removes the allocation Id from the class (guaranteed) |
| 306 | + * |
| 307 | + * @param allocId the allocation ID to get the associated time |
| 308 | + * @return the timestamp associated with that allocation id as well as stop |
| 309 | + * tracking it |
| 310 | + */ |
| 311 | + public long getAndRemoveGuaAllocationLatencies(long allocId) { |
| 312 | + Long ret = allocationGuaranteedLatencies.remove(new Long(allocId)); |
| 313 | + return ret != null ? ret : 0l; |
| 314 | + } |
| 315 | + |
| 316 | + /** |
| 317 | + * Returns the time associated when the allocation Id was added This method |
| 318 | + * removes the allocation Id from the class (opportunistic) |
| 319 | + * |
| 320 | + * @param allocId the allocation ID to get the associated time |
| 321 | + * @return the timestamp associated with that allocation id as well as stop |
| 322 | + * tracking it |
| 323 | + */ |
| 324 | + public long getAndRemoveOppAllocationLatencies(long allocId) { |
| 325 | + Long ret = allocationOpportunisticLatencies.remove(new Long(allocId)); |
| 326 | + return ret != null ? ret : 0l; |
| 327 | + } |
| 328 | + |
| 329 | + /** |
| 330 | + * Set timestamp for the provided ResourceRequest. It will correctly identify |
| 331 | + * their ExecutionType, provided they have they have allocateId != 0 (DEFAULT) |
| 332 | + * This is used in conjunction with This is used in conjunction with |
| 333 | + * updatePromoteLatencies method method |
| 334 | + * |
| 335 | + * @param requests the ResourceRequests to add. |
| 336 | + */ |
| 337 | + public void setAllocateLatenciesTimestamps(List<ResourceRequest> requests) { |
| 338 | + long now = Time.now(); |
| 339 | + for (ResourceRequest req : requests) { |
| 340 | + if (req.getNumContainers() > 0) { |
| 341 | + // we dont support tracking with negative or zero allocationIds |
| 342 | + long allocationRequestId = req.getAllocationRequestId(); |
| 343 | + if (allocationRequestId > 0) { |
| 344 | + if (req.getExecutionTypeRequest() != null) { |
| 345 | + if (ExecutionType.GUARANTEED |
| 346 | + .equals(req.getExecutionTypeRequest().getExecutionType())) { |
| 347 | + addAllocationGuarLatencyIfNotExists(allocationRequestId, now); |
| 348 | + } else { |
| 349 | + addAllocationOppLatencyIfNotExists(allocationRequestId, now); |
| 350 | + } |
| 351 | + } |
| 352 | + } else { |
| 353 | + LOG.warn(String.format( |
| 354 | + "Can't register allocate latency for %s container " |
| 355 | + + "with negative or zero allocation IDs", |
| 356 | + req.getExecutionTypeRequest().getExecutionType())); |
| 357 | + } |
| 358 | + } |
| 359 | + } |
| 360 | + } |
| 361 | + |
| 362 | + /** |
| 363 | + * Updated the JMX metrics class (ClusterMetrics) with the delta time when |
| 364 | + * these containers where added. It will correctly identify their |
| 365 | + * ExecutionType, provided they have they have allocateId != 0 (DEFAULT) |
| 366 | + * |
| 367 | + * @param response the list of the containers to allocate. |
| 368 | + */ |
| 369 | + public void updateAllocateLatencies(List<Container> response) { |
| 370 | + |
| 371 | + for (Container container : response) { |
| 372 | + long allocationRequestId = container.getAllocationRequestId(); |
| 373 | + // we dont support tracking with negative or zero allocationIds |
| 374 | + if (allocationRequestId > 0) { |
| 375 | + long now = System.currentTimeMillis(); |
| 376 | + long allocIdTime = |
| 377 | + (container.getExecutionType() == ExecutionType.GUARANTEED) ? |
| 378 | + getAndRemoveGuaAllocationLatencies(allocationRequestId) : |
| 379 | + getAndRemoveOppAllocationLatencies(allocationRequestId); |
| 380 | + if (allocIdTime != 0) { |
| 381 | + if (container.getExecutionType() == ExecutionType.GUARANTEED) { |
| 382 | + ClusterMetrics.getMetrics() |
| 383 | + .addAllocateGuarLatencyEntry(now - allocIdTime); |
| 384 | + } else { |
| 385 | + ClusterMetrics.getMetrics() |
| 386 | + .addAllocateOppLatencyEntry(now - allocIdTime); |
| 387 | + } |
| 388 | + } else { |
| 389 | + LOG.error(String.format( |
| 390 | + "Can't register allocate latency for %s container %s; allotTime=%d ", |
| 391 | + container.getExecutionType(), container.getId(), allocIdTime)); |
| 392 | + } |
| 393 | + } else { |
| 394 | + LOG.warn(String.format("Cant register promotion latency " |
| 395 | + + "for container %s. Either allocationID is less than equal to 0 or " |
| 396 | + + "lost the container ID", container.getExecutionType().name(), |
| 397 | + container.getId())); |
| 398 | + } |
| 399 | + } |
| 400 | + } |
245 | 401 | } |
0 commit comments