Skip to content

Commit

Permalink
YARN-11626. Optimize ResourceManager's operations on Zookeeper metada…
Browse files Browse the repository at this point in the history
…ta (apache#6616)

Co-authored-by: wuxiaobao <[email protected]>
  • Loading branch information
XbaoWu and XbaoWu authored Mar 21, 2024
1 parent adab3a2 commit a375ef8
Show file tree
Hide file tree
Showing 3 changed files with 441 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-inline</artifactId>
<version>2.8.9</version>
<scope>test</scope>
</dependency>
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
<dependency>
<groupId>org.apache.hadoop</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -956,7 +956,7 @@ private void handleApplicationAttemptStateOp(
zkAcl, fencingNodePath);
break;
case REMOVE:
zkManager.safeDelete(path, zkAcl, fencingNodePath);
safeDeleteAndCheckNode(path, zkAcl, fencingNodePath);
break;
default:
break;
Expand Down Expand Up @@ -1035,10 +1035,10 @@ private void removeApp(String removeAppId, boolean safeRemove,
for (ApplicationAttemptId attemptId : attempts) {
String attemptRemovePath =
getNodePath(appIdRemovePath, attemptId.toString());
zkManager.safeDelete(attemptRemovePath, zkAcl, fencingNodePath);
safeDeleteAndCheckNode(attemptRemovePath, zkAcl, fencingNodePath);
}
}
zkManager.safeDelete(appIdRemovePath, zkAcl, fencingNodePath);
safeDeleteAndCheckNode(appIdRemovePath, zkAcl, fencingNodePath);
} else {
CuratorFramework curatorFramework = zkManager.getCurator();
curatorFramework.delete().deletingChildrenIfNeeded().
Expand Down Expand Up @@ -1099,7 +1099,7 @@ protected synchronized void removeRMDelegationTokenState(
LOG.debug("Removing RMDelegationToken_{}",
rmDTIdentifier.getSequenceNumber());

zkManager.safeDelete(nodeRemovePath, zkAcl, fencingNodePath);
safeDeleteAndCheckNode(nodeRemovePath, zkAcl, fencingNodePath);

// Check if we should remove the parent app node as well.
checkRemoveParentZnode(nodeRemovePath, splitIndex);
Expand Down Expand Up @@ -1160,7 +1160,7 @@ protected synchronized void removeRMDTMasterKeyState(

LOG.debug("Removing RMDelegationKey_{}", delegationKey.getKeyId());

zkManager.safeDelete(nodeRemovePath, zkAcl, fencingNodePath);
safeDeleteAndCheckNode(nodeRemovePath, zkAcl, fencingNodePath);
}

@Override
Expand Down Expand Up @@ -1200,12 +1200,12 @@ protected synchronized void removeReservationState(String planName,
LOG.debug("Removing reservationallocation {} for plan {}",
reservationIdName, planName);

zkManager.safeDelete(reservationPath, zkAcl, fencingNodePath);
safeDeleteAndCheckNode(reservationPath, zkAcl, fencingNodePath);

List<String> reservationNodes = getChildren(planNodePath);

if (reservationNodes.isEmpty()) {
zkManager.safeDelete(planNodePath, zkAcl, fencingNodePath);
safeDeleteAndCheckNode(planNodePath, zkAcl, fencingNodePath);
}
}

Expand Down Expand Up @@ -1441,6 +1441,29 @@ void delete(final String path) throws Exception {
zkManager.delete(path);
}

/**
* Deletes the path more safe.
* When NoNodeException is encountered, if the node does not exist,
* it will ignore this exception to avoid triggering
* a greater impact of ResourceManager failover on the cluster.
* @param path Path to be deleted.
* @param fencingACL fencingACL.
* @param fencingPath fencingNodePath.
* @throws Exception if any problem occurs while performing deletion.
*/
public void safeDeleteAndCheckNode(String path, List<ACL> fencingACL,
String fencingPath) throws Exception {
try{
zkManager.safeDelete(path, fencingACL, fencingPath);
} catch (KeeperException.NoNodeException nne) {
if(!exists(path)){
LOG.info("Node " + path + " doesn't exist to delete");
} else {
throw new KeeperException.NodeExistsException("Node " + path + " should not exist");
}
}
}

/**
* Helper class that periodically attempts creating a znode to ensure that
* this RM continues to be the Active.
Expand Down
Loading

0 comments on commit a375ef8

Please sign in to comment.