Browse Source

[fix][worker][bug] master/worker crash when registry recover from SUSPENDED to RECONNECTED (#13328)

3.1.3-release
hokie-chan 2 years ago committed by zhuangchong
parent
commit
d27d27f7d7
  1. 28
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterWaitingStrategy.java
  2. 28
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/registry/WorkerWaitingStrategy.java

28
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterWaitingStrategy.java

@ -92,18 +92,22 @@ public class MasterWaitingStrategy implements MasterConnectStrategy {
@Override
public void reconnect() {
try {
ServerLifeCycleManager.recoverFromWaiting();
reStartMasterResource();
// reopen the resource
logger.info("Recover from waiting success, the current server status is {}",
ServerLifeCycleManager.getServerStatus());
} catch (Exception e) {
String errorMessage =
String.format("Recover from waiting failed, the current server status is %s, will stop the server",
ServerLifeCycleManager.getServerStatus());
logger.error(errorMessage, e);
registryClient.getStoppable().stop(errorMessage);
if (ServerLifeCycleManager.isRunning()) {
logger.info("no need to reconnect, as the current server status is running");
} else {
try {
ServerLifeCycleManager.recoverFromWaiting();
reStartMasterResource();
logger.info("Recover from waiting success, the current server status is {}",
ServerLifeCycleManager.getServerStatus());
} catch (Exception e) {
String errorMessage =
String.format(
"Recover from waiting failed, the current server status is %s, will stop the server",
ServerLifeCycleManager.getServerStatus());
logger.error(errorMessage, e);
registryClient.getStoppable().stop(errorMessage);
}
}
}

28
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/registry/WorkerWaitingStrategy.java

@ -92,19 +92,23 @@ public class WorkerWaitingStrategy implements WorkerConnectStrategy {
@Override
public void reconnect() {
try {
ServerLifeCycleManager.recoverFromWaiting();
reStartWorkerResource();
logger.info("Recover from waiting success, the current server status is {}",
ServerLifeCycleManager.getServerStatus());
} catch (Exception e) {
String errorMessage =
String.format("Recover from waiting failed, the current server status is %s, will stop the server",
ServerLifeCycleManager.getServerStatus());
logger.error(errorMessage, e);
registryClient.getStoppable().stop(errorMessage);
if (ServerLifeCycleManager.isRunning()) {
logger.info("no need to reconnect, as the current server status is running");
} else {
try {
ServerLifeCycleManager.recoverFromWaiting();
reStartWorkerResource();
logger.info("Recover from waiting success, the current server status is {}",
ServerLifeCycleManager.getServerStatus());
} catch (Exception e) {
String errorMessage =
String.format(
"Recover from waiting failed, the current server status is %s, will stop the server",
ServerLifeCycleManager.getServerStatus());
logger.error(errorMessage, e);
registryClient.getStoppable().stop(errorMessage);
}
}
}
@Override

Loading…
Cancel
Save