Browse Source

[Improvement-4624] When the server exist in the dead server list of zk,need stop service byself (#4626)

* [Improvement-4624] When the server exist in the dead server list of zk,need stop service byself

* [Improvement-4624]fix check style and add MaterRegistryTest

* [Improvement-4624]fix check style and add ZookeeperRegistryCenterTest

* [Improvement-4624]fix check style and add ZookeeperRegistryCenterTest

* [Improvement-4624]add RegisterOperatorTest

* [Improvement-4624]update RegisterOperatorTest

* [Improvement-4624]resolve code smell

* [Improvement-4624]revert LICENSE-@form-create-element-ui
pull/3/MERGE
lgcareer 4 years ago committed by GitHub
parent
commit
43ba29a2dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 37
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/MasterServer.java
  2. 2
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/LowerWeightHostManager.java
  3. 36
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterRegistry.java
  4. 33
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/registry/HeartBeatTask.java
  5. 4
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/registry/ZookeeperNodeManager.java
  6. 84
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/registry/ZookeeperRegistryCenter.java
  7. 30
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/WorkerServer.java
  8. 26
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/registry/WorkerRegistry.java
  9. 3
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java
  10. 4
      dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/master/consumer/TaskPriorityQueueConsumerTest.java
  11. 7
      dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/master/registry/MasterRegistryTest.java
  12. 61
      dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/registry/ZookeeperRegistryCenterTest.java
  13. 4
      dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/worker/processor/TaskCallbackServiceTest.java
  14. 44
      dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/worker/registry/WorkerRegistryTest.java
  15. 81
      dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java
  16. 155
      dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/RegisterOperator.java
  17. 3
      dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/ZookeeperCachedOperator.java
  18. 116
      dolphinscheduler-service/src/test/java/org/apache/dolphinscheduler/service/zk/RegisterOperatorTest.java
  19. 2
      pom.xml

37
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/MasterServer.java

@ -14,9 +14,11 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.dolphinscheduler.server.master; package org.apache.dolphinscheduler.server.master;
import org.apache.dolphinscheduler.common.Constants; import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.IStoppable;
import org.apache.dolphinscheduler.common.thread.Stopper; import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.remote.NettyRemotingServer; import org.apache.dolphinscheduler.remote.NettyRemotingServer;
import org.apache.dolphinscheduler.remote.command.CommandType; import org.apache.dolphinscheduler.remote.command.CommandType;
@ -25,6 +27,7 @@ import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.master.processor.TaskAckProcessor; import org.apache.dolphinscheduler.server.master.processor.TaskAckProcessor;
import org.apache.dolphinscheduler.server.master.processor.TaskKillResponseProcessor; import org.apache.dolphinscheduler.server.master.processor.TaskKillResponseProcessor;
import org.apache.dolphinscheduler.server.master.processor.TaskResponseProcessor; import org.apache.dolphinscheduler.server.master.processor.TaskResponseProcessor;
import org.apache.dolphinscheduler.server.master.registry.MasterRegistry;
import org.apache.dolphinscheduler.server.master.runner.MasterSchedulerService; import org.apache.dolphinscheduler.server.master.runner.MasterSchedulerService;
import org.apache.dolphinscheduler.server.worker.WorkerServer; import org.apache.dolphinscheduler.server.worker.WorkerServer;
import org.apache.dolphinscheduler.server.zk.ZKMasterClient; import org.apache.dolphinscheduler.server.zk.ZKMasterClient;
@ -42,13 +45,10 @@ import org.springframework.boot.builder.SpringApplicationBuilder;
import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.FilterType; import org.springframework.context.annotation.FilterType;
@ComponentScan(value = "org.apache.dolphinscheduler", excludeFilters = { @ComponentScan(value = "org.apache.dolphinscheduler", excludeFilters = {
@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = {WorkerServer.class}) @ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = {WorkerServer.class})
}) })
public class MasterServer { public class MasterServer implements IStoppable {
/** /**
* logger of MasterServer * logger of MasterServer
@ -73,6 +73,12 @@ public class MasterServer {
*/ */
private NettyRemotingServer nettyRemotingServer; private NettyRemotingServer nettyRemotingServer;
/**
* master registry
*/
@Autowired
private MasterRegistry masterRegistry;
/** /**
* zk master client * zk master client
*/ */
@ -87,8 +93,9 @@ public class MasterServer {
/** /**
* master server startup * master server startup
* * <p>
* master server not use web service * master server not use web service
*
* @param args arguments * @param args arguments
*/ */
public static void main(String[] args) { public static void main(String[] args) {
@ -101,7 +108,7 @@ public class MasterServer {
*/ */
@PostConstruct @PostConstruct
public void run() { public void run() {
try {
//init remoting server //init remoting server
NettyServerConfig serverConfig = new NettyServerConfig(); NettyServerConfig serverConfig = new NettyServerConfig();
serverConfig.setListenPort(masterConfig.getListenPort()); serverConfig.setListenPort(masterConfig.getListenPort());
@ -111,6 +118,13 @@ public class MasterServer {
this.nettyRemotingServer.registerProcessor(CommandType.TASK_KILL_RESPONSE, new TaskKillResponseProcessor()); this.nettyRemotingServer.registerProcessor(CommandType.TASK_KILL_RESPONSE, new TaskKillResponseProcessor());
this.nettyRemotingServer.start(); this.nettyRemotingServer.start();
this.masterRegistry.getZookeeperRegistryCenter().setStoppable(this);
} catch (Exception e) {
logger.error(e.getMessage(), e);
throw new RuntimeException(e);
}
// self tolerant // self tolerant
this.zkMasterClient.start(); this.zkMasterClient.start();
@ -137,14 +151,17 @@ public class MasterServer {
Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
@Override @Override
public void run() { public void run() {
if (Stopper.isRunning()) {
close("shutdownHook"); close("shutdownHook");
} }
}
})); }));
} }
/** /**
* gracefully close * gracefully close
*
* @param cause close cause * @param cause close cause
*/ */
public void close(String cause) { public void close(String cause) {
@ -169,6 +186,7 @@ public class MasterServer {
// //
this.masterSchedulerService.close(); this.masterSchedulerService.close();
this.nettyRemotingServer.close(); this.nettyRemotingServer.close();
this.masterRegistry.unRegistry();
this.zkMasterClient.close(); this.zkMasterClient.close();
//close quartz //close quartz
try { try {
@ -177,10 +195,17 @@ public class MasterServer {
} catch (Exception e) { } catch (Exception e) {
logger.warn("Quartz service stopped exception:{}", e.getMessage()); logger.warn("Quartz service stopped exception:{}", e.getMessage());
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("master server stop exception ", e); logger.error("master server stop exception ", e);
} finally {
System.exit(-1); System.exit(-1);
} }
} }
@Override
public void stop(String cause) {
close(cause);
}
} }

2
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/dispatch/host/LowerWeightHostManager.java

@ -149,7 +149,7 @@ public class LowerWeightHostManager extends CommonHostManager {
String workerGroupPath = registryCenter.getWorkerGroupPath(workerGroup); String workerGroupPath = registryCenter.getWorkerGroupPath(workerGroup);
Set<HostWeight> hostWeights = new HashSet<>(nodes.size()); Set<HostWeight> hostWeights = new HashSet<>(nodes.size());
for(String node : nodes){ for(String node : nodes){
String heartbeat = registryCenter.getZookeeperCachedOperator().get(workerGroupPath + "/" + node); String heartbeat = registryCenter.getRegisterOperator().get(workerGroupPath + "/" + node);
if(StringUtils.isNotEmpty(heartbeat) if(StringUtils.isNotEmpty(heartbeat)
&& heartbeat.split(COMMA).length == Constants.HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH){ && heartbeat.split(COMMA).length == Constants.HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH){
String[] parts = heartbeat.split(COMMA); String[] parts = heartbeat.split(COMMA);

36
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterRegistry.java

@ -17,6 +17,7 @@
package org.apache.dolphinscheduler.server.master.registry; package org.apache.dolphinscheduler.server.master.registry;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.utils.DateUtils; import org.apache.dolphinscheduler.common.utils.DateUtils;
import org.apache.dolphinscheduler.common.utils.NetUtils; import org.apache.dolphinscheduler.common.utils.NetUtils;
import org.apache.dolphinscheduler.remote.utils.NamedThreadFactory; import org.apache.dolphinscheduler.remote.utils.NamedThreadFactory;
@ -24,9 +25,7 @@ import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.registry.HeartBeatTask; import org.apache.dolphinscheduler.server.registry.HeartBeatTask;
import org.apache.dolphinscheduler.server.registry.ZookeeperRegistryCenter; import org.apache.dolphinscheduler.server.registry.ZookeeperRegistryCenter;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.state.ConnectionState; import org.apache.curator.framework.state.ConnectionState;
import org.apache.curator.framework.state.ConnectionStateListener;
import java.util.Date; import java.util.Date;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
@ -84,18 +83,17 @@ public class MasterRegistry {
public void registry() { public void registry() {
String address = NetUtils.getHost(); String address = NetUtils.getHost();
String localNodePath = getMasterPath(); String localNodePath = getMasterPath();
zookeeperRegistryCenter.getZookeeperCachedOperator().persistEphemeral(localNodePath, ""); zookeeperRegistryCenter.getRegisterOperator().persistEphemeral(localNodePath, "");
zookeeperRegistryCenter.getZookeeperCachedOperator().getZkClient().getConnectionStateListenable().addListener(new ConnectionStateListener() { zookeeperRegistryCenter.getRegisterOperator().getZkClient().getConnectionStateListenable().addListener(
@Override (client, newState) -> {
public void stateChanged(CuratorFramework client, ConnectionState newState) {
if (newState == ConnectionState.LOST) { if (newState == ConnectionState.LOST) {
logger.error("master : {} connection lost from zookeeper", address); logger.error("master : {} connection lost from zookeeper", address);
} else if (newState == ConnectionState.RECONNECTED) { } else if (newState == ConnectionState.RECONNECTED) {
logger.info("master : {} reconnected to zookeeper", address); logger.info("master : {} reconnected to zookeeper", address);
zookeeperRegistryCenter.getZookeeperCachedOperator().persistEphemeral(localNodePath, ""); zookeeperRegistryCenter.getRegisterOperator().persistEphemeral(localNodePath, "");
} else if (newState == ConnectionState.SUSPENDED) { } else if (newState == ConnectionState.SUSPENDED) {
logger.warn("master : {} connection SUSPENDED ", address); logger.warn("master : {} connection SUSPENDED ", address);
} zookeeperRegistryCenter.getRegisterOperator().persistEphemeral(localNodePath, "");
} }
}); });
int masterHeartbeatInterval = masterConfig.getMasterHeartbeatInterval(); int masterHeartbeatInterval = masterConfig.getMasterHeartbeatInterval();
@ -103,11 +101,11 @@ public class MasterRegistry {
masterConfig.getMasterReservedMemory(), masterConfig.getMasterReservedMemory(),
masterConfig.getMasterMaxCpuloadAvg(), masterConfig.getMasterMaxCpuloadAvg(),
Sets.newHashSet(getMasterPath()), Sets.newHashSet(getMasterPath()),
Constants.MASTER_PREFIX,
zookeeperRegistryCenter); zookeeperRegistryCenter);
this.heartBeatExecutor.scheduleAtFixedRate(heartBeatTask, 0, masterHeartbeatInterval, TimeUnit.SECONDS); this.heartBeatExecutor.scheduleAtFixedRate(heartBeatTask, masterHeartbeatInterval, masterHeartbeatInterval, TimeUnit.SECONDS);
logger.info("master node : {} registry to ZK path {} successfully with heartBeatInterval : {}s" logger.info("master node : {} registry to ZK successfully with heartBeatInterval : {}s", address, masterHeartbeatInterval);
, address, localNodePath, masterHeartbeatInterval);
} }
/** /**
@ -116,16 +114,14 @@ public class MasterRegistry {
public void unRegistry() { public void unRegistry() {
String address = getLocalAddress(); String address = getLocalAddress();
String localNodePath = getMasterPath(); String localNodePath = getMasterPath();
heartBeatExecutor.shutdownNow(); zookeeperRegistryCenter.getRegisterOperator().remove(localNodePath);
zookeeperRegistryCenter.getZookeeperCachedOperator().remove(localNodePath); logger.info("master node : {} unRegistry to ZK.", address);
logger.info("master node : {} unRegistry from ZK path {}."
, address, localNodePath);
} }
/** /**
* get master path * get master path
*/ */
private String getMasterPath() { public String getMasterPath() {
String address = getLocalAddress(); String address = getLocalAddress();
return this.zookeeperRegistryCenter.getMasterPath() + "/" + address; return this.zookeeperRegistryCenter.getMasterPath() + "/" + address;
} }
@ -139,4 +135,12 @@ public class MasterRegistry {
} }
/**
* get zookeeper registry center
* @return ZookeeperRegistryCenter
*/
public ZookeeperRegistryCenter getZookeeperRegistryCenter() {
return zookeeperRegistryCenter;
}
} }

33
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/registry/HeartBeatTask.java

@ -20,6 +20,7 @@ package org.apache.dolphinscheduler.server.registry;
import static org.apache.dolphinscheduler.remote.utils.Constants.COMMA; import static org.apache.dolphinscheduler.remote.utils.Constants.COMMA;
import org.apache.dolphinscheduler.common.Constants; import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.IStoppable;
import org.apache.dolphinscheduler.common.utils.DateUtils; import org.apache.dolphinscheduler.common.utils.DateUtils;
import org.apache.dolphinscheduler.common.utils.OSUtils; import org.apache.dolphinscheduler.common.utils.OSUtils;
@ -29,7 +30,10 @@ import java.util.Set;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
public class HeartBeatTask extends Thread { /**
* Heart beat task
*/
public class HeartBeatTask implements Runnable {
private final Logger logger = LoggerFactory.getLogger(HeartBeatTask.class); private final Logger logger = LoggerFactory.getLogger(HeartBeatTask.class);
@ -37,23 +41,39 @@ public class HeartBeatTask extends Thread {
private double reservedMemory; private double reservedMemory;
private double maxCpuloadAvg; private double maxCpuloadAvg;
private Set<String> heartBeatPaths; private Set<String> heartBeatPaths;
private String serverType;
private ZookeeperRegistryCenter zookeeperRegistryCenter; private ZookeeperRegistryCenter zookeeperRegistryCenter;
/**
* server stop or not
*/
protected IStoppable stoppable = null;
public HeartBeatTask(String startTime, public HeartBeatTask(String startTime,
double reservedMemory, double reservedMemory,
double maxCpuloadAvg, double maxCpuloadAvg,
Set<String> heartBeatPaths, Set<String> heartBeatPaths,
String serverType,
ZookeeperRegistryCenter zookeeperRegistryCenter) { ZookeeperRegistryCenter zookeeperRegistryCenter) {
this.startTime = startTime; this.startTime = startTime;
this.reservedMemory = reservedMemory; this.reservedMemory = reservedMemory;
this.maxCpuloadAvg = maxCpuloadAvg; this.maxCpuloadAvg = maxCpuloadAvg;
this.heartBeatPaths = heartBeatPaths; this.heartBeatPaths = heartBeatPaths;
this.zookeeperRegistryCenter = zookeeperRegistryCenter; this.zookeeperRegistryCenter = zookeeperRegistryCenter;
this.serverType = serverType;
} }
@Override @Override
public void run() { public void run() {
try { try {
// check dead or not in zookeeper
for (String heartBeatPath : heartBeatPaths) {
if (zookeeperRegistryCenter.checkIsDeadServer(heartBeatPath, serverType)) {
zookeeperRegistryCenter.getStoppable().stop("i was judged to death, release resources and stop myself");
return;
}
}
double availablePhysicalMemorySize = OSUtils.availablePhysicalMemorySize(); double availablePhysicalMemorySize = OSUtils.availablePhysicalMemorySize();
double loadAverage = OSUtils.loadAverage(); double loadAverage = OSUtils.loadAverage();
@ -79,10 +99,19 @@ public class HeartBeatTask extends Thread {
builder.append(OSUtils.getProcessID()); builder.append(OSUtils.getProcessID());
for (String heartBeatPath : heartBeatPaths) { for (String heartBeatPath : heartBeatPaths) {
zookeeperRegistryCenter.getZookeeperCachedOperator().update(heartBeatPath, builder.toString()); zookeeperRegistryCenter.getRegisterOperator().update(heartBeatPath, builder.toString());
} }
} catch (Throwable ex) { } catch (Throwable ex) {
logger.error("error write heartbeat info", ex); logger.error("error write heartbeat info", ex);
} }
} }
/**
* for stop server
*
* @param serverStoppable server stoppable interface
*/
public void setStoppable(IStoppable serverStoppable) {
this.stoppable = serverStoppable;
}
} }

4
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/registry/ZookeeperNodeManager.java

@ -93,11 +93,11 @@ public class ZookeeperNodeManager implements InitializingBean {
/** /**
* init MasterNodeListener listener * init MasterNodeListener listener
*/ */
registryCenter.getZookeeperCachedOperator().addListener(new MasterNodeListener()); registryCenter.getRegisterOperator().addListener(new MasterNodeListener());
/** /**
* init WorkerNodeListener listener * init WorkerNodeListener listener
*/ */
registryCenter.getZookeeperCachedOperator().addListener(new WorkerGroupNodeListener()); registryCenter.getRegisterOperator().addListener(new WorkerGroupNodeListener());
} }
/** /**

84
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/registry/ZookeeperRegistryCenter.java

@ -17,17 +17,25 @@
package org.apache.dolphinscheduler.server.registry; package org.apache.dolphinscheduler.server.registry;
import org.apache.dolphinscheduler.service.zk.ZookeeperCachedOperator; import static org.apache.dolphinscheduler.common.Constants.MASTER_PREFIX;
import static org.apache.dolphinscheduler.common.Constants.SINGLE_SLASH;
import static org.apache.dolphinscheduler.common.Constants.UNDERLINE;
import static org.apache.dolphinscheduler.common.Constants.WORKER_PREFIX;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.IStoppable;
import org.apache.dolphinscheduler.service.zk.RegisterOperator;
import org.apache.dolphinscheduler.service.zk.ZookeeperConfig; import org.apache.dolphinscheduler.service.zk.ZookeeperConfig;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
/** /**
* zookeeper register center * zookeeper register center
*/ */
@ -38,8 +46,7 @@ public class ZookeeperRegistryCenter implements InitializingBean {
@Autowired @Autowired
protected ZookeeperCachedOperator zookeeperCachedOperator; protected RegisterOperator registerOperator;
@Autowired @Autowired
private ZookeeperConfig zookeeperConfig; private ZookeeperConfig zookeeperConfig;
@ -60,6 +67,8 @@ public class ZookeeperRegistryCenter implements InitializingBean {
public final String EMPTY = ""; public final String EMPTY = "";
private IStoppable stoppable;
@Override @Override
public void afterPropertiesSet() throws Exception { public void afterPropertiesSet() throws Exception {
NODES = zookeeperConfig.getDsRoot() + "/nodes"; NODES = zookeeperConfig.getDsRoot() + "/nodes";
@ -82,23 +91,22 @@ public class ZookeeperRegistryCenter implements InitializingBean {
* init nodes * init nodes
*/ */
private void initNodes() { private void initNodes() {
zookeeperCachedOperator.persist(MASTER_PATH, EMPTY); registerOperator.persist(MASTER_PATH, EMPTY);
zookeeperCachedOperator.persist(WORKER_PATH, EMPTY); registerOperator.persist(WORKER_PATH, EMPTY);
} }
/** /**
* close * close
*/ */
public void close() { public void close() {
if (isStarted.compareAndSet(true, false)) { if (isStarted.compareAndSet(true, false) && registerOperator != null) {
if (zookeeperCachedOperator != null) { registerOperator.close();
zookeeperCachedOperator.close();
}
} }
} }
/** /**
* get master path * get master path
*
* @return master path * @return master path
*/ */
public String getMasterPath() { public String getMasterPath() {
@ -107,6 +115,7 @@ public class ZookeeperRegistryCenter implements InitializingBean {
/** /**
* get worker path * get worker path
*
* @return worker path * @return worker path
*/ */
public String getWorkerPath() { public String getWorkerPath() {
@ -115,6 +124,7 @@ public class ZookeeperRegistryCenter implements InitializingBean {
/** /**
* get master nodes directly * get master nodes directly
*
* @return master nodes * @return master nodes
*/ */
public Set<String> getMasterNodesDirectly() { public Set<String> getMasterNodesDirectly() {
@ -124,6 +134,7 @@ public class ZookeeperRegistryCenter implements InitializingBean {
/** /**
* get worker nodes directly * get worker nodes directly
*
* @return master nodes * @return master nodes
*/ */
public Set<String> getWorkerNodesDirectly() { public Set<String> getWorkerNodesDirectly() {
@ -133,6 +144,7 @@ public class ZookeeperRegistryCenter implements InitializingBean {
/** /**
* get worker group directly * get worker group directly
*
* @return worker group nodes * @return worker group nodes
*/ */
public Set<String> getWorkerGroupDirectly() { public Set<String> getWorkerGroupDirectly() {
@ -142,6 +154,7 @@ public class ZookeeperRegistryCenter implements InitializingBean {
/** /**
* get worker group nodes * get worker group nodes
*
* @param workerGroup * @param workerGroup
* @return * @return
*/ */
@ -152,6 +165,7 @@ public class ZookeeperRegistryCenter implements InitializingBean {
/** /**
* whether worker path * whether worker path
*
* @param path path * @param path path
* @return result * @return result
*/ */
@ -161,6 +175,7 @@ public class ZookeeperRegistryCenter implements InitializingBean {
/** /**
* whether master path * whether master path
*
* @param path path * @param path path
* @return result * @return result
*/ */
@ -170,6 +185,7 @@ public class ZookeeperRegistryCenter implements InitializingBean {
/** /**
* get worker group path * get worker group path
*
* @param workerGroup workerGroup * @param workerGroup workerGroup
* @return worker group path * @return worker group path
*/ */
@ -179,19 +195,53 @@ public class ZookeeperRegistryCenter implements InitializingBean {
/** /**
* get children nodes * get children nodes
*
* @param key key * @param key key
* @return children nodes * @return children nodes
*/ */
public List<String> getChildrenKeys(final String key) { public List<String> getChildrenKeys(final String key) {
return zookeeperCachedOperator.getChildrenKeys(key); return registerOperator.getChildrenKeys(key);
} }
/** /**
* get zookeeperCachedOperator * @return get dead server node parent path
* @return zookeeperCachedOperator
*/ */
public ZookeeperCachedOperator getZookeeperCachedOperator() { public String getDeadZNodeParentPath() {
return zookeeperCachedOperator; return registerOperator.getZookeeperConfig().getDsRoot() + Constants.ZOOKEEPER_DOLPHINSCHEDULER_DEAD_SERVERS;
}
public void setStoppable(IStoppable stoppable) {
this.stoppable = stoppable;
} }
public IStoppable getStoppable() {
return stoppable;
}
/**
* check dead server or not , if dead, stop self
*
* @param zNode node path
* @param serverType master or worker prefix
* @return true if not exists
* @throws Exception errors
*/
protected boolean checkIsDeadServer(String zNode, String serverType) throws Exception {
//ip_sequenceno
String[] zNodesPath = zNode.split("\\/");
String ipSeqNo = zNodesPath[zNodesPath.length - 1];
String type = serverType.equals(MASTER_PREFIX) ? MASTER_PREFIX : WORKER_PREFIX;
String deadServerPath = getDeadZNodeParentPath() + SINGLE_SLASH + type + UNDERLINE + ipSeqNo;
if (!registerOperator.isExisted(zNode) || registerOperator.isExisted(deadServerPath)) {
return true;
}
return false;
}
public RegisterOperator getRegisterOperator() {
return registerOperator;
}
} }

30
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/WorkerServer.java

@ -18,6 +18,8 @@
package org.apache.dolphinscheduler.server.worker; package org.apache.dolphinscheduler.server.worker;
import org.apache.dolphinscheduler.common.Constants; import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.IStoppable;
import org.apache.dolphinscheduler.common.enums.ZKNodeType;
import org.apache.dolphinscheduler.common.thread.Stopper; import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.remote.NettyRemotingServer; import org.apache.dolphinscheduler.remote.NettyRemotingServer;
import org.apache.dolphinscheduler.remote.command.CommandType; import org.apache.dolphinscheduler.remote.command.CommandType;
@ -33,11 +35,15 @@ import org.apache.dolphinscheduler.server.worker.runner.WorkerManagerThread;
import org.apache.dolphinscheduler.service.alert.AlertClientService; import org.apache.dolphinscheduler.service.alert.AlertClientService;
import org.apache.dolphinscheduler.service.bean.SpringApplicationContext; import org.apache.dolphinscheduler.service.bean.SpringApplicationContext;
import java.util.Set;
import javax.annotation.PostConstruct; import javax.annotation.PostConstruct;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.WebApplicationType; import org.springframework.boot.WebApplicationType;
import org.springframework.boot.builder.SpringApplicationBuilder; import org.springframework.boot.builder.SpringApplicationBuilder;
import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.ComponentScan;
@ -46,7 +52,7 @@ import org.springframework.context.annotation.ComponentScan;
* worker server * worker server
*/ */
@ComponentScan("org.apache.dolphinscheduler") @ComponentScan("org.apache.dolphinscheduler")
public class WorkerServer { public class WorkerServer implements IStoppable {
/** /**
* logger * logger
@ -105,24 +111,31 @@ public class WorkerServer {
*/ */
@PostConstruct @PostConstruct
public void run() { public void run() {
try {
logger.info("start worker server..."); logger.info("start worker server...");
//alert-server client registry
alertClientService = new AlertClientService(workerConfig.getAlertListenHost(),Constants.ALERT_RPC_PORT);
//init remoting server //init remoting server
NettyServerConfig serverConfig = new NettyServerConfig(); NettyServerConfig serverConfig = new NettyServerConfig();
serverConfig.setListenPort(workerConfig.getListenPort()); serverConfig.setListenPort(workerConfig.getListenPort());
this.nettyRemotingServer = new NettyRemotingServer(serverConfig); this.nettyRemotingServer = new NettyRemotingServer(serverConfig);
this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_REQUEST, new TaskExecuteProcessor(alertClientService)); this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_REQUEST, new TaskExecuteProcessor());
this.nettyRemotingServer.registerProcessor(CommandType.TASK_KILL_REQUEST, new TaskKillProcessor()); this.nettyRemotingServer.registerProcessor(CommandType.TASK_KILL_REQUEST, new TaskKillProcessor());
this.nettyRemotingServer.registerProcessor(CommandType.DB_TASK_ACK, new DBTaskAckProcessor()); this.nettyRemotingServer.registerProcessor(CommandType.DB_TASK_ACK, new DBTaskAckProcessor());
this.nettyRemotingServer.registerProcessor(CommandType.DB_TASK_RESPONSE, new DBTaskResponseProcessor()); this.nettyRemotingServer.registerProcessor(CommandType.DB_TASK_RESPONSE, new DBTaskResponseProcessor());
this.nettyRemotingServer.start(); this.nettyRemotingServer.start();
this.workerRegistry.getZookeeperRegistryCenter().setStoppable(this);
Set<String> workerZkPaths = this.workerRegistry.getWorkerZkPaths();
this.workerRegistry.getZookeeperRegistryCenter().getRegisterOperator().handleDeadServer(workerZkPaths, ZKNodeType.WORKER, Constants.DELETE_ZK_OP);
// worker registry // worker registry
this.workerRegistry.registry(); this.workerRegistry.registry();
// retry report task status
this.retryReportTaskStatusThread.start();
} catch (Exception e) {
logger.error(e.getMessage(), e);
throw new RuntimeException(e);
}
// task execute manager // task execute manager
this.workerManagerThread.start(); this.workerManagerThread.start();
@ -135,8 +148,10 @@ public class WorkerServer {
Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
@Override @Override
public void run() { public void run() {
if (Stopper.isRunning()) {
close("shutdownHook"); close("shutdownHook");
} }
}
})); }));
} }
@ -167,8 +182,13 @@ public class WorkerServer {
} catch (Exception e) { } catch (Exception e) {
logger.error("worker server stop exception ", e); logger.error("worker server stop exception ", e);
} finally {
System.exit(-1); System.exit(-1);
} }
} }
@Override
public void stop(String cause) {
close(cause);
}
} }

26
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/worker/registry/WorkerRegistry.java

@ -20,6 +20,7 @@ package org.apache.dolphinscheduler.server.worker.registry;
import static org.apache.dolphinscheduler.common.Constants.DEFAULT_WORKER_GROUP; import static org.apache.dolphinscheduler.common.Constants.DEFAULT_WORKER_GROUP;
import static org.apache.dolphinscheduler.common.Constants.SLASH; import static org.apache.dolphinscheduler.common.Constants.SLASH;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.utils.DateUtils; import org.apache.dolphinscheduler.common.utils.DateUtils;
import org.apache.dolphinscheduler.common.utils.NetUtils; import org.apache.dolphinscheduler.common.utils.NetUtils;
import org.apache.dolphinscheduler.common.utils.StringUtils; import org.apache.dolphinscheduler.common.utils.StringUtils;
@ -29,9 +30,7 @@ import org.apache.dolphinscheduler.server.registry.HeartBeatTask;
import org.apache.dolphinscheduler.server.registry.ZookeeperRegistryCenter; import org.apache.dolphinscheduler.server.registry.ZookeeperRegistryCenter;
import org.apache.dolphinscheduler.server.worker.config.WorkerConfig; import org.apache.dolphinscheduler.server.worker.config.WorkerConfig;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.state.ConnectionState; import org.apache.curator.framework.state.ConnectionState;
import org.apache.curator.framework.state.ConnectionStateListener;
import java.util.Date; import java.util.Date;
import java.util.Set; import java.util.Set;
@ -89,6 +88,14 @@ public class WorkerRegistry {
this.heartBeatExecutor = Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("HeartBeatExecutor")); this.heartBeatExecutor = Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("HeartBeatExecutor"));
} }
/**
* get zookeeper registry center
* @return ZookeeperRegistryCenter
*/
public ZookeeperRegistryCenter getZookeeperRegistryCenter() {
return zookeeperRegistryCenter;
}
/** /**
* registry * registry
*/ */
@ -98,19 +105,17 @@ public class WorkerRegistry {
int workerHeartbeatInterval = workerConfig.getWorkerHeartbeatInterval(); int workerHeartbeatInterval = workerConfig.getWorkerHeartbeatInterval();
for (String workerZKPath : workerZkPaths) { for (String workerZKPath : workerZkPaths) {
zookeeperRegistryCenter.getZookeeperCachedOperator().persistEphemeral(workerZKPath, ""); zookeeperRegistryCenter.getRegisterOperator().persistEphemeral(workerZKPath, "");
zookeeperRegistryCenter.getZookeeperCachedOperator().getZkClient().getConnectionStateListenable().addListener(new ConnectionStateListener() { zookeeperRegistryCenter.getRegisterOperator().getZkClient().getConnectionStateListenable().addListener(
@Override (client,newState) -> {
public void stateChanged(CuratorFramework client, ConnectionState newState) {
if (newState == ConnectionState.LOST) { if (newState == ConnectionState.LOST) {
logger.error("worker : {} connection lost from zookeeper", address); logger.error("worker : {} connection lost from zookeeper", address);
} else if (newState == ConnectionState.RECONNECTED) { } else if (newState == ConnectionState.RECONNECTED) {
logger.info("worker : {} reconnected to zookeeper", address); logger.info("worker : {} reconnected to zookeeper", address);
zookeeperRegistryCenter.getZookeeperCachedOperator().persistEphemeral(workerZKPath, ""); zookeeperRegistryCenter.getRegisterOperator().persistEphemeral(workerZKPath, "");
} else if (newState == ConnectionState.SUSPENDED) { } else if (newState == ConnectionState.SUSPENDED) {
logger.warn("worker : {} connection SUSPENDED ", address); logger.warn("worker : {} connection SUSPENDED ", address);
} }
}
}); });
logger.info("worker node : {} registry to ZK {} successfully", address, workerZKPath); logger.info("worker node : {} registry to ZK {} successfully", address, workerZKPath);
} }
@ -119,6 +124,7 @@ public class WorkerRegistry {
this.workerConfig.getWorkerReservedMemory(), this.workerConfig.getWorkerReservedMemory(),
this.workerConfig.getWorkerMaxCpuloadAvg(), this.workerConfig.getWorkerMaxCpuloadAvg(),
workerZkPaths, workerZkPaths,
Constants.WORKER_PREFIX,
this.zookeeperRegistryCenter); this.zookeeperRegistryCenter);
this.heartBeatExecutor.scheduleAtFixedRate(heartBeatTask, workerHeartbeatInterval, workerHeartbeatInterval, TimeUnit.SECONDS); this.heartBeatExecutor.scheduleAtFixedRate(heartBeatTask, workerHeartbeatInterval, workerHeartbeatInterval, TimeUnit.SECONDS);
@ -132,7 +138,7 @@ public class WorkerRegistry {
String address = getLocalAddress(); String address = getLocalAddress();
Set<String> workerZkPaths = getWorkerZkPaths(); Set<String> workerZkPaths = getWorkerZkPaths();
for (String workerZkPath : workerZkPaths) { for (String workerZkPath : workerZkPaths) {
zookeeperRegistryCenter.getZookeeperCachedOperator().remove(workerZkPath); zookeeperRegistryCenter.getRegisterOperator().remove(workerZkPath);
logger.info("worker node : {} unRegistry from ZK {}.", address, workerZkPath); logger.info("worker node : {} unRegistry from ZK {}.", address, workerZkPath);
} }
this.heartBeatExecutor.shutdownNow(); this.heartBeatExecutor.shutdownNow();
@ -141,7 +147,7 @@ public class WorkerRegistry {
/** /**
* get worker path * get worker path
*/ */
private Set<String> getWorkerZkPaths() { public Set<String> getWorkerZkPaths() {
Set<String> workerZkPaths = Sets.newHashSet(); Set<String> workerZkPaths = Sets.newHashSet();
String address = getLocalAddress(); String address = getLocalAddress();

3
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/zk/ZKMasterClient.java

@ -85,6 +85,9 @@ public class ZKMasterClient extends AbstractZKClient {
// Master registry // Master registry
masterRegistry.registry(); masterRegistry.registry();
String registPath = this.masterRegistry.getMasterPath();
masterRegistry.getZookeeperRegistryCenter().getRegisterOperator().handleDeadServer(registPath, ZKNodeType.MASTER, Constants.DELETE_ZK_OP);
// init system znode // init system znode
this.initSystemZNode(); this.initSystemZNode();

4
dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/master/consumer/TaskPriorityQueueConsumerTest.java

@ -45,7 +45,7 @@ import org.apache.dolphinscheduler.service.process.ProcessService;
import org.apache.dolphinscheduler.service.queue.TaskPriority; import org.apache.dolphinscheduler.service.queue.TaskPriority;
import org.apache.dolphinscheduler.service.queue.TaskPriorityQueue; import org.apache.dolphinscheduler.service.queue.TaskPriorityQueue;
import org.apache.dolphinscheduler.service.zk.CuratorZookeeperClient; import org.apache.dolphinscheduler.service.zk.CuratorZookeeperClient;
import org.apache.dolphinscheduler.service.zk.ZookeeperCachedOperator; import org.apache.dolphinscheduler.service.zk.RegisterOperator;
import org.apache.dolphinscheduler.service.zk.ZookeeperConfig; import org.apache.dolphinscheduler.service.zk.ZookeeperConfig;
import java.util.ArrayList; import java.util.ArrayList;
@ -67,7 +67,7 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
@RunWith(SpringJUnit4ClassRunner.class) @RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(classes = {DependencyConfig.class, SpringApplicationContext.class, SpringZKServer.class, CuratorZookeeperClient.class, @ContextConfiguration(classes = {DependencyConfig.class, SpringApplicationContext.class, SpringZKServer.class, CuratorZookeeperClient.class,
NettyExecutorManager.class, ExecutorDispatcher.class, ZookeeperRegistryCenter.class, TaskPriorityQueueConsumer.class, NettyExecutorManager.class, ExecutorDispatcher.class, ZookeeperRegistryCenter.class, TaskPriorityQueueConsumer.class,
ZookeeperNodeManager.class, ZookeeperCachedOperator.class, ZookeeperConfig.class, MasterConfig.class, ZookeeperNodeManager.class, RegisterOperator.class, ZookeeperConfig.class, MasterConfig.class,
CuratorZookeeperClient.class}) CuratorZookeeperClient.class})
public class TaskPriorityQueueConsumerTest { public class TaskPriorityQueueConsumerTest {

7
dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/master/registry/MasterRegistryTest.java

@ -19,7 +19,6 @@ package org.apache.dolphinscheduler.server.master.registry;
import static org.apache.dolphinscheduler.common.Constants.HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH; import static org.apache.dolphinscheduler.common.Constants.HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH;
import org.apache.dolphinscheduler.common.utils.NetUtils;
import org.apache.dolphinscheduler.remote.utils.Constants; import org.apache.dolphinscheduler.remote.utils.Constants;
import org.apache.dolphinscheduler.server.master.config.MasterConfig; import org.apache.dolphinscheduler.server.master.config.MasterConfig;
import org.apache.dolphinscheduler.server.registry.ZookeeperRegistryCenter; import org.apache.dolphinscheduler.server.registry.ZookeeperRegistryCenter;
@ -60,8 +59,8 @@ public class MasterRegistryTest {
masterRegistry.registry(); masterRegistry.registry();
String masterPath = zookeeperRegistryCenter.getMasterPath(); String masterPath = zookeeperRegistryCenter.getMasterPath();
TimeUnit.SECONDS.sleep(masterConfig.getMasterHeartbeatInterval() + 2); //wait heartbeat info write into zk node TimeUnit.SECONDS.sleep(masterConfig.getMasterHeartbeatInterval() + 2); //wait heartbeat info write into zk node
String masterNodePath = masterPath + "/" + (NetUtils.getAddr(Constants.LOCAL_ADDRESS, masterConfig.getListenPort())); String masterNodePath = masterPath + "/" + (Constants.LOCAL_ADDRESS + ":" + masterConfig.getListenPort());
String heartbeat = zookeeperRegistryCenter.getZookeeperCachedOperator().get(masterNodePath); String heartbeat = zookeeperRegistryCenter.getRegisterOperator().get(masterNodePath);
Assert.assertEquals(HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH, heartbeat.split(",").length); Assert.assertEquals(HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH, heartbeat.split(",").length);
masterRegistry.unRegistry(); masterRegistry.unRegistry();
} }
@ -73,7 +72,7 @@ public class MasterRegistryTest {
TimeUnit.SECONDS.sleep(masterConfig.getMasterHeartbeatInterval() + 2); //wait heartbeat info write into zk node TimeUnit.SECONDS.sleep(masterConfig.getMasterHeartbeatInterval() + 2); //wait heartbeat info write into zk node
masterRegistry.unRegistry(); masterRegistry.unRegistry();
String masterPath = zookeeperRegistryCenter.getMasterPath(); String masterPath = zookeeperRegistryCenter.getMasterPath();
List<String> childrenKeys = zookeeperRegistryCenter.getZookeeperCachedOperator().getChildrenKeys(masterPath); List<String> childrenKeys = zookeeperRegistryCenter.getRegisterOperator().getChildrenKeys(masterPath);
Assert.assertTrue(childrenKeys.isEmpty()); Assert.assertTrue(childrenKeys.isEmpty());
} }
} }

61
dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/registry/ZookeeperRegistryCenterTest.java

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.registry;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.service.zk.RegisterOperator;
import org.apache.dolphinscheduler.service.zk.ZookeeperConfig;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.MockitoJUnitRunner;
/**
* zookeeper registry center test
*/
@RunWith(MockitoJUnitRunner.class)
public class ZookeeperRegistryCenterTest {
@InjectMocks
private ZookeeperRegistryCenter zookeeperRegistryCenter;
@Mock
protected RegisterOperator registerOperator;
@Mock
private ZookeeperConfig zookeeperConfig;
private static final String DS_ROOT = "/dolphinscheduler";
@Test
public void testGetDeadZNodeParentPath() {
ZookeeperConfig zookeeperConfig = new ZookeeperConfig();
zookeeperConfig.setDsRoot(DS_ROOT);
Mockito.when(registerOperator.getZookeeperConfig()).thenReturn(zookeeperConfig);
String deadZNodeParentPath = zookeeperRegistryCenter.getDeadZNodeParentPath();
Assert.assertEquals(deadZNodeParentPath, DS_ROOT + Constants.ZOOKEEPER_DOLPHINSCHEDULER_DEAD_SERVERS);
}
}

4
dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/worker/processor/TaskCallbackServiceTest.java

@ -43,7 +43,7 @@ import org.apache.dolphinscheduler.server.worker.runner.WorkerManagerThread;
import org.apache.dolphinscheduler.server.zk.SpringZKServer; import org.apache.dolphinscheduler.server.zk.SpringZKServer;
import org.apache.dolphinscheduler.service.bean.SpringApplicationContext; import org.apache.dolphinscheduler.service.bean.SpringApplicationContext;
import org.apache.dolphinscheduler.service.zk.CuratorZookeeperClient; import org.apache.dolphinscheduler.service.zk.CuratorZookeeperClient;
import org.apache.dolphinscheduler.service.zk.ZookeeperCachedOperator; import org.apache.dolphinscheduler.service.zk.RegisterOperator;
import org.apache.dolphinscheduler.service.zk.ZookeeperConfig; import org.apache.dolphinscheduler.service.zk.ZookeeperConfig;
import java.util.Date; import java.util.Date;
@ -71,7 +71,7 @@ import io.netty.channel.Channel;
ZookeeperRegistryCenter.class, ZookeeperRegistryCenter.class,
MasterConfig.class, MasterConfig.class,
WorkerConfig.class, WorkerConfig.class,
ZookeeperCachedOperator.class, RegisterOperator.class,
ZookeeperConfig.class, ZookeeperConfig.class,
ZookeeperNodeManager.class, ZookeeperNodeManager.class,
TaskCallbackService.class, TaskCallbackService.class,

44
dolphinscheduler-server/src/test/java/org/apache/dolphinscheduler/server/worker/registry/WorkerRegistryTest.java

@ -19,18 +19,20 @@ package org.apache.dolphinscheduler.server.worker.registry;
import static org.apache.dolphinscheduler.common.Constants.DEFAULT_WORKER_GROUP; import static org.apache.dolphinscheduler.common.Constants.DEFAULT_WORKER_GROUP;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Executor;
import org.apache.curator.framework.imps.CuratorFrameworkImpl;
import org.apache.curator.framework.listen.Listenable;
import org.apache.curator.framework.state.ConnectionStateListener;
import org.apache.dolphinscheduler.common.utils.NetUtils; import org.apache.dolphinscheduler.common.utils.NetUtils;
import org.apache.dolphinscheduler.common.utils.StringUtils; import org.apache.dolphinscheduler.common.utils.StringUtils;
import org.apache.dolphinscheduler.server.registry.ZookeeperRegistryCenter; import org.apache.dolphinscheduler.server.registry.ZookeeperRegistryCenter;
import org.apache.dolphinscheduler.server.worker.config.WorkerConfig; import org.apache.dolphinscheduler.server.worker.config.WorkerConfig;
import org.apache.dolphinscheduler.service.zk.ZookeeperCachedOperator; import org.apache.dolphinscheduler.service.zk.RegisterOperator;
import org.apache.curator.framework.imps.CuratorFrameworkImpl;
import org.apache.curator.framework.listen.Listenable;
import org.apache.curator.framework.state.ConnectionStateListener;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Executor;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -61,7 +63,7 @@ public class WorkerRegistryTest {
private ZookeeperRegistryCenter zookeeperRegistryCenter; private ZookeeperRegistryCenter zookeeperRegistryCenter;
@Mock @Mock
private ZookeeperCachedOperator zookeeperCachedOperator; private RegisterOperator registerOperator;
@Mock @Mock
private CuratorFrameworkImpl zkClient; private CuratorFrameworkImpl zkClient;
@ -69,15 +71,21 @@ public class WorkerRegistryTest {
@Mock @Mock
private WorkerConfig workerConfig; private WorkerConfig workerConfig;
private static final Set<String> workerGroups;
static {
workerGroups = Sets.newHashSet(DEFAULT_WORKER_GROUP, TEST_WORKER_GROUP);
}
@Before @Before
public void before() { public void before() {
Set<String> workerGroups = Sets.newHashSet(DEFAULT_WORKER_GROUP, TEST_WORKER_GROUP);
Mockito.when(workerConfig.getWorkerGroups()).thenReturn(workerGroups); Mockito.when(workerConfig.getWorkerGroups()).thenReturn(workerGroups);
Mockito.when(zookeeperRegistryCenter.getWorkerPath()).thenReturn("/dolphinscheduler/nodes/worker"); Mockito.when(zookeeperRegistryCenter.getWorkerPath()).thenReturn("/dolphinscheduler/nodes/worker");
Mockito.when(zookeeperRegistryCenter.getZookeeperCachedOperator()).thenReturn(zookeeperCachedOperator); Mockito.when(zookeeperRegistryCenter.getRegisterOperator()).thenReturn(registerOperator);
Mockito.when(zookeeperRegistryCenter.getZookeeperCachedOperator().getZkClient()).thenReturn(zkClient); Mockito.when(zookeeperRegistryCenter.getRegisterOperator().getZkClient()).thenReturn(zkClient);
Mockito.when(zookeeperRegistryCenter.getZookeeperCachedOperator().getZkClient().getConnectionStateListenable()).thenReturn( Mockito.when(zookeeperRegistryCenter.getRegisterOperator().getZkClient().getConnectionStateListenable()).thenReturn(
new Listenable<ConnectionStateListener>() { new Listenable<ConnectionStateListener>() {
@Override @Override
public void addListener(ConnectionStateListener connectionStateListener) { public void addListener(ConnectionStateListener connectionStateListener) {
@ -114,7 +122,7 @@ public class WorkerRegistryTest {
int i = 0; int i = 0;
for (String workerGroup : workerConfig.getWorkerGroups()) { for (String workerGroup : workerConfig.getWorkerGroups()) {
String workerZkPath = workerPath + "/" + workerGroup.trim() + "/" + (NetUtils.getAddr(workerConfig.getListenPort())); String workerZkPath = workerPath + "/" + workerGroup.trim() + "/" + (NetUtils.getAddr(workerConfig.getListenPort()));
String heartbeat = zookeeperRegistryCenter.getZookeeperCachedOperator().get(workerZkPath); String heartbeat = zookeeperRegistryCenter.getRegisterOperator().get(workerZkPath);
if (0 == i) { if (0 == i) {
Assert.assertTrue(workerZkPath.startsWith("/dolphinscheduler/nodes/worker/test/")); Assert.assertTrue(workerZkPath.startsWith("/dolphinscheduler/nodes/worker/test/"));
} else { } else {
@ -156,7 +164,7 @@ public class WorkerRegistryTest {
for (String workerGroup : workerConfig.getWorkerGroups()) { for (String workerGroup : workerConfig.getWorkerGroups()) {
String workerGroupPath = workerPath + "/" + workerGroup.trim(); String workerGroupPath = workerPath + "/" + workerGroup.trim();
List<String> childrenKeys = zookeeperRegistryCenter.getZookeeperCachedOperator().getChildrenKeys(workerGroupPath); List<String> childrenKeys = zookeeperRegistryCenter.getRegisterOperator().getChildrenKeys(workerGroupPath);
Assert.assertTrue(childrenKeys.isEmpty()); Assert.assertTrue(childrenKeys.isEmpty());
} }
@ -168,4 +176,10 @@ public class WorkerRegistryTest {
workerRegistry.unRegistry(); workerRegistry.unRegistry();
} }
@Test
public void testGetWorkerZkPaths() {
workerRegistry.init();
Assert.assertEquals(workerGroups.size(),workerRegistry.getWorkerZkPaths().size());
}
} }

81
dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/AbstractZKClient.java

@ -17,14 +17,8 @@
package org.apache.dolphinscheduler.service.zk; package org.apache.dolphinscheduler.service.zk;
import static org.apache.dolphinscheduler.common.Constants.ADD_ZK_OP;
import static org.apache.dolphinscheduler.common.Constants.COLON; import static org.apache.dolphinscheduler.common.Constants.COLON;
import static org.apache.dolphinscheduler.common.Constants.DELETE_ZK_OP;
import static org.apache.dolphinscheduler.common.Constants.DIVISION_STRING; import static org.apache.dolphinscheduler.common.Constants.DIVISION_STRING;
import static org.apache.dolphinscheduler.common.Constants.MASTER_PREFIX;
import static org.apache.dolphinscheduler.common.Constants.SINGLE_SLASH;
import static org.apache.dolphinscheduler.common.Constants.UNDERLINE;
import static org.apache.dolphinscheduler.common.Constants.WORKER_PREFIX;
import org.apache.dolphinscheduler.common.Constants; import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.enums.ZKNodeType; import org.apache.dolphinscheduler.common.enums.ZKNodeType;
@ -47,57 +41,10 @@ import org.springframework.stereotype.Component;
* abstract zookeeper client * abstract zookeeper client
*/ */
@Component @Component
public abstract class AbstractZKClient extends ZookeeperCachedOperator { public abstract class AbstractZKClient extends RegisterOperator {
private static final Logger logger = LoggerFactory.getLogger(AbstractZKClient.class); private static final Logger logger = LoggerFactory.getLogger(AbstractZKClient.class);
/**
* remove dead server by host
*
* @param host host
* @param serverType serverType
*/
public void removeDeadServerByHost(String host, String serverType) {
List<String> deadServers = super.getChildrenKeys(getDeadZNodeParentPath());
for (String serverPath : deadServers) {
if (serverPath.startsWith(serverType + UNDERLINE + host)) {
String server = getDeadZNodeParentPath() + SINGLE_SLASH + serverPath;
super.remove(server);
logger.info("{} server {} deleted from zk dead server path success", serverType, host);
}
}
}
/**
* opType(add): if find dead server , then add to zk deadServerPath
* opType(delete): delete path from zk
*
* @param zNode node path
* @param zkNodeType master or worker
* @param opType delete or add
*/
public void handleDeadServer(String zNode, ZKNodeType zkNodeType, String opType) {
String host = getHostByEventDataPath(zNode);
String type = (zkNodeType == ZKNodeType.MASTER) ? MASTER_PREFIX : WORKER_PREFIX;
//check server restart, if restart , dead server path in zk should be delete
if (opType.equals(DELETE_ZK_OP)) {
removeDeadServerByHost(host, type);
} else if (opType.equals(ADD_ZK_OP)) {
String deadServerPath = getDeadZNodeParentPath() + SINGLE_SLASH + type + UNDERLINE + host;
if (!super.isExisted(deadServerPath)) {
//add dead server info to zk dead server path : /dead-servers/
super.persist(deadServerPath, (type + UNDERLINE + host));
logger.info("{} server dead , and {} added to zk dead server path success",
zkNodeType, zNode);
}
}
}
/** /**
* get active master num * get active master num
* *
@ -247,12 +194,6 @@ public abstract class AbstractZKClient extends ZookeeperCachedOperator {
return path; return path;
} }
/**
* @return get dead server node parent path
*/
protected String getDeadZNodeParentPath() {
return getZookeeperConfig().getDsRoot() + Constants.ZOOKEEPER_DOLPHINSCHEDULER_DEAD_SERVERS;
}
/** /**
* @return get master start up lock path * @return get master start up lock path
@ -310,26 +251,6 @@ public abstract class AbstractZKClient extends ZookeeperCachedOperator {
} }
} }
/**
* get host ip, string format: masterParentPath/ip
*
* @param path path
* @return host ip, string format: masterParentPath/ip
*/
protected String getHostByEventDataPath(String path) {
if (StringUtils.isEmpty(path)) {
logger.error("empty path!");
return "";
}
String[] pathArray = path.split(SINGLE_SLASH);
if (pathArray.length < 1) {
logger.error("parse ip error: {}", path);
return "";
}
return pathArray[pathArray.length - 1];
}
@Override @Override
public String toString() { public String toString() {
return "AbstractZKClient{" return "AbstractZKClient{"

155
dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/RegisterOperator.java

@ -0,0 +1,155 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.service.zk;
import static org.apache.dolphinscheduler.common.Constants.ADD_ZK_OP;
import static org.apache.dolphinscheduler.common.Constants.DELETE_ZK_OP;
import static org.apache.dolphinscheduler.common.Constants.MASTER_PREFIX;
import static org.apache.dolphinscheduler.common.Constants.SINGLE_SLASH;
import static org.apache.dolphinscheduler.common.Constants.UNDERLINE;
import static org.apache.dolphinscheduler.common.Constants.WORKER_PREFIX;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.enums.ZKNodeType;
import org.apache.dolphinscheduler.common.utils.StringUtils;
import java.util.List;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
/**
* register operator
*/
@Component
public class RegisterOperator extends ZookeeperCachedOperator {
private final Logger logger = LoggerFactory.getLogger(RegisterOperator.class);
/**
* @return get dead server node parent path
*/
protected String getDeadZNodeParentPath() {
return getZookeeperConfig().getDsRoot() + Constants.ZOOKEEPER_DOLPHINSCHEDULER_DEAD_SERVERS;
}
/**
* remove dead server by host
*
* @param host host
* @param serverType serverType
* @throws Exception
*/
public void removeDeadServerByHost(String host, String serverType) throws Exception {
List<String> deadServers = super.getChildrenKeys(getDeadZNodeParentPath());
for (String serverPath : deadServers) {
if (serverPath.startsWith(serverType + UNDERLINE + host)) {
String server = getDeadZNodeParentPath() + SINGLE_SLASH + serverPath;
super.remove(server);
logger.info("{} server {} deleted from zk dead server path success", serverType, host);
}
}
}
/**
* get host ip, string format: masterParentPath/ip
*
* @param path path
* @return host ip, string format: masterParentPath/ip
*/
protected String getHostByEventDataPath(String path) {
if (StringUtils.isEmpty(path)) {
logger.error("empty path!");
return "";
}
String[] pathArray = path.split(SINGLE_SLASH);
if (pathArray.length < 1) {
logger.error("parse ip error: {}", path);
return "";
}
return pathArray[pathArray.length - 1];
}
/**
* opType(add): if find dead server , then add to zk deadServerPath
* opType(delete): delete path from zk
*
* @param zNode node path
* @param zkNodeType master or worker
* @param opType delete or add
* @throws Exception errors
*/
public void handleDeadServer(String zNode, ZKNodeType zkNodeType, String opType) throws Exception {
String host = getHostByEventDataPath(zNode);
String type = (zkNodeType == ZKNodeType.MASTER) ? MASTER_PREFIX : WORKER_PREFIX;
//check server restart, if restart , dead server path in zk should be delete
if (opType.equals(DELETE_ZK_OP)) {
removeDeadServerByHost(host, type);
} else if (opType.equals(ADD_ZK_OP)) {
String deadServerPath = getDeadZNodeParentPath() + SINGLE_SLASH + type + UNDERLINE + host;
if (!super.isExisted(deadServerPath)) {
//add dead server info to zk dead server path : /dead-servers/
super.persist(deadServerPath, (type + UNDERLINE + host));
logger.info("{} server dead , and {} added to zk dead server path success",
zkNodeType, zNode);
}
}
}
/**
* opType(add): if find dead server , then add to zk deadServerPath
* opType(delete): delete path from zk
*
* @param zNodeSet node path set
* @param zkNodeType master or worker
* @param opType delete or add
* @throws Exception errors
*/
public void handleDeadServer(Set<String> zNodeSet, ZKNodeType zkNodeType, String opType) throws Exception {
String type = (zkNodeType == ZKNodeType.MASTER) ? MASTER_PREFIX : WORKER_PREFIX;
for (String zNode : zNodeSet) {
String host = getHostByEventDataPath(zNode);
//check server restart, if restart , dead server path in zk should be delete
if (opType.equals(DELETE_ZK_OP)) {
removeDeadServerByHost(host, type);
} else if (opType.equals(ADD_ZK_OP)) {
String deadServerPath = getDeadZNodeParentPath() + SINGLE_SLASH + type + UNDERLINE + host;
if (!super.isExisted(deadServerPath)) {
//add dead server info to zk dead server path : /dead-servers/
super.persist(deadServerPath, (type + UNDERLINE + host));
logger.info("{} server dead , and {} added to zk dead server path success",
zkNodeType, zNode);
}
}
}
}
}

3
dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/zk/ZookeeperCachedOperator.java

@ -32,6 +32,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
/**
* zookeeper cache operator
*/
@Component @Component
public class ZookeeperCachedOperator extends ZookeeperOperator { public class ZookeeperCachedOperator extends ZookeeperOperator {

116
dolphinscheduler-service/src/test/java/org/apache/dolphinscheduler/service/zk/RegisterOperatorTest.java

@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.service.zk;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.enums.ZKNodeType;
import java.util.concurrent.TimeUnit;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.MockitoJUnitRunner;
/**
* register operator test
*/
@RunWith(MockitoJUnitRunner.Silent.class)
public class RegisterOperatorTest {
private static ZKServer zkServer;
@InjectMocks
private RegisterOperator registerOperator;
@Mock
private ZookeeperConfig zookeeperConfig;
private static final String DS_ROOT = "/dolphinscheduler";
private static final String MASTER_NODE = "127.0.0.1:5678";
@Before
public void before() {
new Thread(() -> {
if (zkServer == null) {
zkServer = new ZKServer();
}
zkServer.startLocalZkServer(2185);
}).start();
}
@Test
public void testAfterPropertiesSet() throws Exception {
TimeUnit.SECONDS.sleep(10);
Mockito.when(zookeeperConfig.getServerList()).thenReturn("127.0.0.1:2185");
Mockito.when(zookeeperConfig.getBaseSleepTimeMs()).thenReturn(100);
Mockito.when(zookeeperConfig.getMaxRetries()).thenReturn(10);
Mockito.when(zookeeperConfig.getMaxSleepMs()).thenReturn(30000);
Mockito.when(zookeeperConfig.getSessionTimeoutMs()).thenReturn(60000);
Mockito.when(zookeeperConfig.getConnectionTimeoutMs()).thenReturn(30000);
Mockito.when(zookeeperConfig.getDigest()).thenReturn("");
Mockito.when(zookeeperConfig.getDsRoot()).thenReturn(DS_ROOT);
Mockito.when(zookeeperConfig.getMaxWaitTime()).thenReturn(30000);
registerOperator.afterPropertiesSet();
Assert.assertNotNull(registerOperator.getZkClient());
}
@After
public void after() {
if (zkServer != null) {
zkServer.stop();
}
}
@Test
public void testGetDeadZNodeParentPath() throws Exception {
testAfterPropertiesSet();
String path = registerOperator.getDeadZNodeParentPath();
Assert.assertEquals(DS_ROOT + Constants.ZOOKEEPER_DOLPHINSCHEDULER_DEAD_SERVERS, path);
}
@Test
public void testHandleDeadServer() throws Exception {
testAfterPropertiesSet();
registerOperator.handleDeadServer(MASTER_NODE, ZKNodeType.MASTER,Constants.ADD_ZK_OP);
String path = registerOperator.getDeadZNodeParentPath();
Assert.assertTrue(registerOperator.getChildrenKeys(path).contains(String.format("%s_%s",Constants.MASTER_PREFIX,MASTER_NODE)));
}
@Test
public void testRemoveDeadServerByHost() throws Exception {
testAfterPropertiesSet();
String path = registerOperator.getDeadZNodeParentPath();
registerOperator.handleDeadServer(MASTER_NODE, ZKNodeType.MASTER,Constants.ADD_ZK_OP);
Assert.assertTrue(registerOperator.getChildrenKeys(path).contains(String.format("%s_%s",Constants.MASTER_PREFIX,MASTER_NODE)));
registerOperator.removeDeadServerByHost(MASTER_NODE,Constants.MASTER_PREFIX);
Assert.assertFalse(registerOperator.getChildrenKeys(path).contains(String.format("%s_%s",Constants.MASTER_PREFIX,MASTER_NODE)));
}
}

2
pom.xml

@ -930,6 +930,7 @@
<include>**/server/master/processor/TaskKillResponseProcessorTest.java</include> <include>**/server/master/processor/TaskKillResponseProcessorTest.java</include>
<include>**/server/master/processor/queue/TaskResponseServiceTest.java</include> <include>**/server/master/processor/queue/TaskResponseServiceTest.java</include>
<include>**/server/register/ZookeeperNodeManagerTest.java</include> <include>**/server/register/ZookeeperNodeManagerTest.java</include>
<include>**/server/register/ZookeeperRegistryCenterTest.java</include>
<include>**/server/utils/DataxUtilsTest.java</include> <include>**/server/utils/DataxUtilsTest.java</include>
<include>**/server/utils/ExecutionContextTestUtils.java</include> <include>**/server/utils/ExecutionContextTestUtils.java</include>
<include>**/server/utils/HostTest.java</include> <include>**/server/utils/HostTest.java</include>
@ -961,6 +962,7 @@
<include>**/service/zk/DefaultEnsembleProviderTest.java</include> <include>**/service/zk/DefaultEnsembleProviderTest.java</include>
<include>**/service/zk/ZKServerTest.java</include> <include>**/service/zk/ZKServerTest.java</include>
<include>**/service/zk/CuratorZookeeperClientTest.java</include> <include>**/service/zk/CuratorZookeeperClientTest.java</include>
<include>**/service/zk/RegisterOperatorTest.java</include>
<include>**/service/queue/TaskUpdateQueueTest.java</include> <include>**/service/queue/TaskUpdateQueueTest.java</include>
<include>**/service/queue/PeerTaskInstancePriorityQueueTest.java</include> <include>**/service/queue/PeerTaskInstancePriorityQueueTest.java</include>

Loading…
Cancel
Save