分布式调度框架。
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

257 lines
9.9 KiB

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.runner;
import com.alibaba.fastjson.JSON;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.enums.ExecutionStatus;
import org.apache.dolphinscheduler.common.enums.TaskTimeoutStrategy;
import org.apache.dolphinscheduler.common.model.TaskNode;
import org.apache.dolphinscheduler.common.task.TaskTimeoutParameter;
import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.common.utils.CollectionUtils;
import org.apache.dolphinscheduler.dao.entity.ProcessDefinition;
import org.apache.dolphinscheduler.dao.entity.TaskInstance;
import org.apache.dolphinscheduler.remote.command.TaskKillRequestCommand;
import org.apache.dolphinscheduler.remote.utils.Host;
import org.apache.dolphinscheduler.server.master.cache.TaskInstanceCacheManager;
import org.apache.dolphinscheduler.server.master.cache.impl.TaskInstanceCacheManagerImpl;
import org.apache.dolphinscheduler.server.master.dispatch.context.ExecutionContext;
import org.apache.dolphinscheduler.server.master.dispatch.enums.ExecutorType;
import org.apache.dolphinscheduler.server.master.dispatch.executor.NettyExecutorManager;
import org.apache.dolphinscheduler.server.registry.ZookeeperRegistryCenter;
import org.apache.dolphinscheduler.service.bean.SpringApplicationContext;
import org.springframework.beans.factory.annotation.Autowired;
import java.util.Date;
import java.util.Set;
/**
* master task exec thread
*/
public class MasterTaskExecThread extends MasterBaseTaskExecThread {
/**
* taskInstance state manager
*/
private TaskInstanceCacheManager taskInstanceCacheManager;
private NettyExecutorManager nettyExecutorManager;
/**
* zookeeper register center
*/
private ZookeeperRegistryCenter zookeeperRegistryCenter;
/**
* constructor of MasterTaskExecThread
* @param taskInstance task instance
*/
public MasterTaskExecThread(TaskInstance taskInstance){
super(taskInstance);
this.taskInstanceCacheManager = SpringApplicationContext.getBean(TaskInstanceCacheManagerImpl.class);
this.nettyExecutorManager = SpringApplicationContext.getBean(NettyExecutorManager.class);
this.zookeeperRegistryCenter = SpringApplicationContext.getBean(ZookeeperRegistryCenter.class);
}
/**
* get task instance
* @return TaskInstance
*/
@Override
public TaskInstance getTaskInstance(){
return this.taskInstance;
}
/**
* whether already Killed,default false
*/
private boolean alreadyKilled = false;
/**
* submit task instance and wait complete
*
* @return true is task quit is true
*/
@Override
public Boolean submitWaitComplete() {
Boolean result = false;
this.taskInstance = submit();
if(this.taskInstance == null){
logger.error("submit task instance to mysql and queue failed , please check and fix it");
return result;
}
if(!this.taskInstance.getState().typeIsFinished()) {
result = waitTaskQuit();
}
taskInstance.setEndTime(new Date());
processService.updateTaskInstance(taskInstance);
logger.info("task :{} id:{}, process id:{}, exec thread completed ",
this.taskInstance.getName(),taskInstance.getId(), processInstance.getId() );
return result;
}
/**
* polling db
*
* wait task quit
* @return true if task quit success
*/
public Boolean waitTaskQuit(){
// query new state
taskInstance = processService.findTaskInstanceById(taskInstance.getId());
logger.info("wait task: process id: {}, task id:{}, task name:{} complete",
this.taskInstance.getProcessInstanceId(), this.taskInstance.getId(), this.taskInstance.getName());
// task time out
boolean checkTimeout = false;
TaskTimeoutParameter taskTimeoutParameter = getTaskTimeoutParameter();
if(taskTimeoutParameter.getEnable()){
TaskTimeoutStrategy strategy = taskTimeoutParameter.getStrategy();
if(strategy == TaskTimeoutStrategy.WARN || strategy == TaskTimeoutStrategy.WARNFAILED){
checkTimeout = true;
}
}
while (Stopper.isRunning()){
try {
if(this.processInstance == null){
logger.error("process instance not exists , master task exec thread exit");
return true;
}
// task instance add queue , waiting worker to kill
if(this.cancel || this.processInstance.getState() == ExecutionStatus.READY_STOP){
cancelTaskInstance();
}
// task instance finished
if (taskInstance.getState().typeIsFinished()){
// if task is final result , then remove taskInstance from cache
taskInstanceCacheManager.removeByTaskInstanceId(taskInstance.getId());
break;
}
if(checkTimeout){
long remainTime = getRemaintime(taskTimeoutParameter.getInterval() * 60L);
if (remainTime < 0) {
logger.warn("task id: {} execution time out",taskInstance.getId());
// process define
ProcessDefinition processDefine = processService.findProcessDefineById(processInstance.getProcessDefinitionId());
// send warn mail
alertDao.sendTaskTimeoutAlert(processInstance.getWarningGroupId(),processDefine.getReceivers(),
processDefine.getReceiversCc(), processInstance.getId(), processInstance.getName(),
taskInstance.getId(),taskInstance.getName());
checkTimeout = false;
}
}
// updateProcessInstance task instance
taskInstance = processService.findTaskInstanceById(taskInstance.getId());
processInstance = processService.findProcessInstanceById(processInstance.getId());
Thread.sleep(Constants.SLEEP_TIME_MILLIS);
} catch (Exception e) {
logger.error("exception",e);
if (processInstance != null) {
logger.error("wait task quit failed, instance id:{}, task id:{}",
processInstance.getId(), taskInstance.getId());
}
}
}
return true;
}
/**
* task instance add queue , waiting worker to kill
*/
private void cancelTaskInstance() throws Exception{
if(alreadyKilled){
return ;
}
alreadyKilled = true;
String taskInstanceWorkerGroup = taskInstance.getWorkerGroup();
// not exists
if (!existsValidWorkerGroup(taskInstanceWorkerGroup)){
taskInstance.setState(ExecutionStatus.KILL);
taskInstance.setEndTime(new Date());
processService.updateTaskInstance(taskInstance);
return;
}
TaskKillRequestCommand killCommand = new TaskKillRequestCommand();
killCommand.setTaskInstanceId(taskInstance.getId());
ExecutionContext executionContext = new ExecutionContext(killCommand.convert2Command(), ExecutorType.WORKER);
Host host = Host.of(taskInstance.getHost());
executionContext.setHost(host);
nettyExecutorManager.executeDirectly(executionContext);
logger.info("master kill taskInstance name :{} taskInstance id:{}",
taskInstance.getName(), taskInstance.getId() );
}
/**
* whether exists valid worker group
* @param taskInstanceWorkerGroup taskInstanceWorkerGroup
* @return whether exists
*/
public Boolean existsValidWorkerGroup(String taskInstanceWorkerGroup){
Set<String> workerGroups = zookeeperRegistryCenter.getWorkerGroupDirectly();
// not worker group
if (CollectionUtils.isEmpty(workerGroups)){
return false;
}
// has worker group , but not taskInstance assigned worker group
if (!workerGroups.contains(taskInstanceWorkerGroup)){
return false;
}
Set<String> workers = zookeeperRegistryCenter.getWorkerGroupNodesDirectly(taskInstanceWorkerGroup);
if (CollectionUtils.isEmpty(workers)) {
return false;
}
return true;
}
/**
* get task timeout parameter
* @return TaskTimeoutParameter
*/
private TaskTimeoutParameter getTaskTimeoutParameter(){
String taskJson = taskInstance.getTaskJson();
TaskNode taskNode = JSON.parseObject(taskJson, TaskNode.class);
return taskNode.getTaskTimeoutParameter();
}
/**
* get remain time?s?
*
* @return remain time
*/
private long getRemaintime(long timeoutSeconds) {
Date startTime = taskInstance.getStartTime();
long usedTime = (System.currentTimeMillis() - startTime.getTime()) / 1000;
return timeoutSeconds - usedTime;
}
}