From 785c34bc66b04dbccbd64639850dc44d67957aba Mon Sep 17 00:00:00 2001 From: bao liang <29528966+lenboo@users.noreply.github.com> Date: Tue, 7 Apr 2020 16:23:31 +0800 Subject: [PATCH] fix bug:master fault tolerance error (#2350) (#2375) fix bug: Visit the worker page of the monitoring center, a null pointer occur(#2349) Co-authored-by: baoliang --- .../dolphinscheduler/server/zk/ZKMasterClient.java | 3 ++- .../service/zk/AbstractZKClient.java | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java index 0e9a83944d..727eff6b57 100644 --- a/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java +++ b/dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java @@ -24,6 +24,7 @@ import org.apache.dolphinscheduler.common.Constants; import org.apache.dolphinscheduler.common.enums.ExecutionStatus; import org.apache.dolphinscheduler.common.enums.ZKNodeType; import org.apache.dolphinscheduler.common.model.Server; +import org.apache.dolphinscheduler.common.utils.OSUtils; import org.apache.dolphinscheduler.dao.entity.ProcessInstance; import org.apache.dolphinscheduler.dao.entity.TaskInstance; import org.apache.dolphinscheduler.server.builder.TaskExecutionContextBuilder; @@ -72,7 +73,7 @@ public class ZKMasterClient extends AbstractZKClient { this.initSystemZNode(); // check if fault tolerance is required?failure and tolerance - if (getActiveMasterNum() == 1) { + if (getActiveMasterNum() == 1 && checkZKNodeExists(OSUtils.getHost(), ZKNodeType.MASTER)) { failoverWorker(null, true); failoverMaster(null); } diff --git a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java index 106dbc1c23..c9f58743a1 100644 --- a/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java +++ b/dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java @@ -145,8 +145,18 @@ public abstract class AbstractZKClient extends ZookeeperCachedOperator { try { String path = getZNodeParentPath(zkNodeType); List serverList = super.getChildrenKeys(path); + if(zkNodeType == ZKNodeType.WORKER){ + List workerList = new ArrayList<>(); + for(String group : serverList){ + List groupServers = super.getChildrenKeys(path + Constants.SLASH + group); + for(String groupServer : groupServers){ + workerList.add(group + Constants.SLASH + groupServer); + } + } + serverList = workerList; + } for(String server : serverList){ - masterMap.putIfAbsent(server, super.get(path + "/" + server)); + masterMap.putIfAbsent(server, super.get(path + Constants.SLASH + server)); } } catch (Exception e) { logger.error("get server list failed", e);