Browse Source

Merge pull request #342 from qiaozhanwei/branch-1.0.2

master startup lock dev
pull/2/head
乔占卫 5 years ago committed by GitHub
parent
commit
7f4c49364b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 5
      escheduler-common/src/main/java/cn/escheduler/common/Constants.java
  2. 1
      escheduler-common/src/main/resources/zookeeper.properties
  3. 56
      escheduler-server/src/main/java/cn/escheduler/server/zk/ZKMasterClient.java

5
escheduler-common/src/main/java/cn/escheduler/common/Constants.java

@ -162,6 +162,11 @@ public final class Constants {
*/ */
public static final String ZOOKEEPER_ESCHEDULER_LOCK_FAILOVER_WORKERS = "zookeeper.escheduler.lock.failover.workers"; public static final String ZOOKEEPER_ESCHEDULER_LOCK_FAILOVER_WORKERS = "zookeeper.escheduler.lock.failover.workers";
/**
* MasterServer startup failover runing and fault tolerance process
*/
public static final String ZOOKEEPER_ESCHEDULER_LOCK_FAILOVER_STARTUP_MASTERS = "zookeeper.escheduler.lock.failover.startup.masters";
/** /**
* need send warn times when master server or worker server failover * need send warn times when master server or worker server failover
*/ */

1
escheduler-common/src/main/resources/zookeeper.properties

@ -16,6 +16,7 @@ zookeeper.escheduler.lock.workers=/escheduler/lock/workers
#escheduler failover directory #escheduler failover directory
zookeeper.escheduler.lock.failover.masters=/escheduler/lock/failover/masters zookeeper.escheduler.lock.failover.masters=/escheduler/lock/failover/masters
zookeeper.escheduler.lock.failover.workers=/escheduler/lock/failover/workers zookeeper.escheduler.lock.failover.workers=/escheduler/lock/failover/workers
zookeeper.escheduler.lock.failover.startup.masters=/escheduler/lock/failover/startup-masters
#escheduler failover directory #escheduler failover directory
zookeeper.session.timeout=300 zookeeper.session.timeout=300

56
escheduler-server/src/main/java/cn/escheduler/server/zk/ZKMasterClient.java

@ -31,6 +31,7 @@ import cn.escheduler.dao.model.TaskInstance;
import cn.escheduler.server.ResInfo; import cn.escheduler.server.ResInfo;
import cn.escheduler.server.utils.ProcessUtils; import cn.escheduler.server.utils.ProcessUtils;
import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.imps.CuratorFrameworkState;
import org.apache.curator.framework.recipes.cache.PathChildrenCache; import org.apache.curator.framework.recipes.cache.PathChildrenCache;
import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent; import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent;
import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener;
@ -111,21 +112,46 @@ public class ZKMasterClient extends AbstractZKClient {
// init dao // init dao
this.initDao(); this.initDao();
// init system znode InterProcessMutex mutex = null;
this.initSystemZNode(); try {
// create distributed lock with the root node path of the lock space as /escheduler/lock/failover/master
String znodeLock = getMasterStartUpLockPath();
mutex = new InterProcessMutex(zkClient, znodeLock);
mutex.acquire();
// init system znode
this.initSystemZNode();
// monitor master
this.listenerMaster();
// monitor master // monitor worker
this.listenerMaster(); this.listenerWorker();
// monitor worker // register master
this.listenerWorker(); this.registMaster();
// register master // check if fault tolerance is required,failure and tolerance
this.registMaster(); if (getActiveMasterNum() == 1) {
processDao.selfFaultTolerant(ExecutionStatus.RUNNING_EXEUTION.ordinal(),ExecutionStatus.NEED_FAULT_TOLERANCE.ordinal());
}
}catch (Exception e){
logger.error("master start up exception : " + e.getMessage(),e);
}finally {
if (mutex != null){
try {
mutex.release();
} catch (Exception e) {
if(e.getMessage().equals("instance must be started before calling this method")){
logger.warn("lock release");
}else{
logger.error("lock release failed : " + e.getMessage(),e);
}
// check if fault tolerance is required,failure and tolerance }
if (getActiveMasterNum() == 1) { }
processDao.selfFaultTolerant(ExecutionStatus.RUNNING_EXEUTION.ordinal(),ExecutionStatus.NEED_FAULT_TOLERANCE.ordinal());
} }
} }
@ -417,6 +443,14 @@ public class ZKMasterClient extends AbstractZKClient {
return conf.getString(Constants.ZOOKEEPER_ESCHEDULER_LOCK_MASTERS); return conf.getString(Constants.ZOOKEEPER_ESCHEDULER_LOCK_MASTERS);
} }
/**
* get master start up lock path
* @return
*/
public String getMasterStartUpLockPath(){
return conf.getString(Constants.ZOOKEEPER_ESCHEDULER_LOCK_FAILOVER_STARTUP_MASTERS);
}
/** /**
* get master failover lock path * get master failover lock path
* @return * @return

Loading…
Cancel
Save