|
|
|
/*
|
|
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
|
|
* this work for additional information regarding copyright ownership.
|
|
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
* (the "License"); you may not use this file except in compliance with
|
|
|
|
* the License. You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
package org.apache.dolphinscheduler.server.master.runner;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import com.alibaba.fastjson.JSON;
|
|
|
|
|
|
|
|
import org.apache.dolphinscheduler.common.Constants;
|
|
|
|
import org.apache.dolphinscheduler.common.enums.ExecutionStatus;
|
|
|
|
import org.apache.dolphinscheduler.common.enums.TaskTimeoutStrategy;
|
|
|
|
import org.apache.dolphinscheduler.common.model.TaskNode;
|
|
|
|
import org.apache.dolphinscheduler.common.task.TaskTimeoutParameter;
|
|
|
|
import org.apache.dolphinscheduler.common.thread.Stopper;
|
no valid worker group,master can kill task directly (#2541)
* dispatch task fail will set task status failed
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* #2486 bug fix
* host and workergroup compatible
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* #2499 bug fix
* add comment
* revert comment
* revert comment
* #2499 buf fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
Co-authored-by: qiaozhanwei <qiaozhanwei@analysys.com.cn>
4 years ago
|
|
|
import org.apache.dolphinscheduler.common.utils.CollectionUtils;
|
|
|
|
import org.apache.dolphinscheduler.dao.entity.ProcessDefinition;
|
|
|
|
import org.apache.dolphinscheduler.dao.entity.TaskInstance;
|
|
|
|
import org.apache.dolphinscheduler.remote.command.TaskKillRequestCommand;
|
|
|
|
import org.apache.dolphinscheduler.remote.utils.Host;
|
|
|
|
import org.apache.dolphinscheduler.server.master.cache.TaskInstanceCacheManager;
|
|
|
|
import org.apache.dolphinscheduler.server.master.cache.impl.TaskInstanceCacheManagerImpl;
|
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.context.ExecutionContext;
|
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.enums.ExecutorType;
|
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.executor.NettyExecutorManager;
|
no valid worker group,master can kill task directly (#2541)
* dispatch task fail will set task status failed
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* #2486 bug fix
* host and workergroup compatible
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* #2499 bug fix
* add comment
* revert comment
* revert comment
* #2499 buf fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
Co-authored-by: qiaozhanwei <qiaozhanwei@analysys.com.cn>
4 years ago
|
|
|
import org.apache.dolphinscheduler.server.registry.ZookeeperRegistryCenter;
|
|
|
|
import org.apache.dolphinscheduler.service.bean.SpringApplicationContext;
|
no valid worker group,master can kill task directly (#2541)
* dispatch task fail will set task status failed
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* #2486 bug fix
* host and workergroup compatible
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* #2499 bug fix
* add comment
* revert comment
* revert comment
* #2499 buf fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
Co-authored-by: qiaozhanwei <qiaozhanwei@analysys.com.cn>
4 years ago
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
|
|
|
|
import java.util.Date;
|
no valid worker group,master can kill task directly (#2541)
* dispatch task fail will set task status failed
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* #2486 bug fix
* host and workergroup compatible
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* #2499 bug fix
* add comment
* revert comment
* revert comment
* #2499 buf fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
Co-authored-by: qiaozhanwei <qiaozhanwei@analysys.com.cn>
4 years ago
|
|
|
import java.util.Set;
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* master task exec thread
|
|
|
|
*/
|
|
|
|
public class MasterTaskExecThread extends MasterBaseTaskExecThread {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* taskInstance state manager
|
|
|
|
*/
|
|
|
|
private TaskInstanceCacheManager taskInstanceCacheManager;
|
|
|
|
|
|
|
|
|
|
|
|
private NettyExecutorManager nettyExecutorManager;
|
|
|
|
|
no valid worker group,master can kill task directly (#2541)
* dispatch task fail will set task status failed
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* #2486 bug fix
* host and workergroup compatible
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* #2499 bug fix
* add comment
* revert comment
* revert comment
* #2499 buf fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
Co-authored-by: qiaozhanwei <qiaozhanwei@analysys.com.cn>
4 years ago
|
|
|
|
|
|
|
/**
|
|
|
|
* zookeeper register center
|
|
|
|
*/
|
|
|
|
private ZookeeperRegistryCenter zookeeperRegistryCenter;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* constructor of MasterTaskExecThread
|
|
|
|
* @param taskInstance task instance
|
|
|
|
*/
|
|
|
|
public MasterTaskExecThread(TaskInstance taskInstance){
|
|
|
|
super(taskInstance);
|
|
|
|
this.taskInstanceCacheManager = SpringApplicationContext.getBean(TaskInstanceCacheManagerImpl.class);
|
|
|
|
this.nettyExecutorManager = SpringApplicationContext.getBean(NettyExecutorManager.class);
|
no valid worker group,master can kill task directly (#2541)
* dispatch task fail will set task status failed
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* #2486 bug fix
* host and workergroup compatible
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* #2499 bug fix
* add comment
* revert comment
* revert comment
* #2499 buf fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
Co-authored-by: qiaozhanwei <qiaozhanwei@analysys.com.cn>
4 years ago
|
|
|
this.zookeeperRegistryCenter = SpringApplicationContext.getBean(ZookeeperRegistryCenter.class);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* get task instance
|
|
|
|
* @return TaskInstance
|
|
|
|
*/
|
|
|
|
@Override
|
|
|
|
public TaskInstance getTaskInstance(){
|
|
|
|
return this.taskInstance;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* whether already Killed,default false
|
|
|
|
*/
|
|
|
|
private boolean alreadyKilled = false;
|
|
|
|
|
|
|
|
/**
|
1,ZookeeperRegister use common.properties zookeeperRoot path 2,api start exclude org.apache.dolphinscheduler.server.* (#2307)
* 1, master persistent task
2. extract master and worker communication model
* 1, master persistent task
2. extract master and worker communication model
* 1, master persistent task
2. extract master and worker communication model
* add license
* modify javadoc error
* TaskExecutionContext create modify
* buildAckCommand taskInstanceId not set modify
* java doc error modify
* add comment
* ExecutorManager interface add generic type
* add TaskInstanceCacheManager receive Worker report result
* TaskInstance setExecutePath
* add TaskInstanceCacheManager to receive Worker Task result report
* TaskInstanceCacheManager add remove method
* add license
* add dispatcht task method
* AbstractCommandExecutor remove db access
* AbstractCommandExecutor remove db access
* AbstractCommandExecutor remove db access
* AbstractCommandExecutor remove db access
* AbstractCommandExecutor remove db access
* AbstractCommandExecutor remove db access
* AbstractCommandExecutor remove db access
* taskInstanceCache is null ,need load from db
* taskInstanceCache is null ,need load from db
* taskInstanceCache is null ,need load from db
* 1,worker TaskPros use TaskExecutionContext replase
2,Master kill Task , KillTaskProcessor modify
* worker remove db
* ShellTask modify
* master persistence processId and appIds
* master persistence processId and appIds
* master add kill task logic
* master add kill task logic
* master add kill task logic
* javadoc error modify
* remove chinese log
* executeDirectly method add Override
* remote module modify
* TaskKillResponseProcessor command type modify
* create buildKillCommand
* host add host:port format
* host add host:port format
* TaskAckProcessor modify
* TaskAckProcessor modify
* task prioriry refator
* remove ITaskQueue
* task prioriry refator
* remove ITaskQueue
* TaskPriority refactor
* remove logs
* WorkerServer refactor
* MasterSchedulerService modify
* WorkerConfig listen port modify
* modify master and worker listen port
* cancelTaskInstance set TaskExecutionContext host,logPath,executePath
* cancelTaskInstance set TaskExecutionContext host,logPath,executePath
* Encapsulate the parameters required by sqltask
* 1,Encapsulate the parameters required by sqltask
2,SQLTask optimization
* AbstractTask modify
* ProcedureTask optimization
* MasterSchedulerService modify
* TaskUpdateQueueConsumer modify
* test
* DataxTask process run debug
* DataxTask process run debug
* add protobuf dependency,MR、Spark task etc need this
* TaskUpdateQueueConsumer modify
* TaskExecutionContextBuilder set TaskInstance workgroup
* WorkerGroupService queryAllGroup modify
query available work group
* 1,get workergroup from zk modify
2,SpringConnectionFactory repeat load modify
* master and worker register ip use OSUtils.getHost()
* ProcessInstance host set ip:port format
* worker fault tolerance modify
* Constants and .env modify
* master fault tolerant bug modify
* UT add pom.xml
* timing online modify
* when taskResponse is faster than taskAck to db,task state will error
add async queue and new a thread reslove this problem
* TaskExecutionContext set host
* 1,TaskManager refactor
2, api start load server dolphinschedule-daemon.sh modify
* 1,TaskManager refactor
2, api start load server dolphinschedule-daemon.sh modify
* add UT in pom.xml
* revert dolphinscheduler-daemon.sh
* ZookeeperRegister use common.properties zookeeperRoot path
* api start exclude org.apache.dolphinscheduler.server.*
* ZookeeperRegister use common.properties zookeeperRoot path
* 1,api start load server filter
2,SHELL task exitStatusCode modify
* java doc error modify
* java doc error modify
* remove todo
Co-authored-by: qiaozhanwei <qiaozhanwei@analysys.com.cn>
4 years ago
|
|
|
* submit task instance and wait complete
|
|
|
|
*
|
|
|
|
* @return true is task quit is true
|
|
|
|
*/
|
|
|
|
@Override
|
|
|
|
public Boolean submitWaitComplete() {
|
|
|
|
Boolean result = false;
|
|
|
|
this.taskInstance = submit();
|
|
|
|
if(this.taskInstance == null){
|
|
|
|
logger.error("submit task instance to mysql and queue failed , please check and fix it");
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
if(!this.taskInstance.getState().typeIsFinished()) {
|
|
|
|
result = waitTaskQuit();
|
|
|
|
}
|
|
|
|
taskInstance.setEndTime(new Date());
|
|
|
|
processService.updateTaskInstance(taskInstance);
|
|
|
|
logger.info("task :{} id:{}, process id:{}, exec thread completed ",
|
|
|
|
this.taskInstance.getName(),taskInstance.getId(), processInstance.getId() );
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
1,ZookeeperRegister use common.properties zookeeperRoot path 2,api start exclude org.apache.dolphinscheduler.server.* (#2307)
* 1, master persistent task
2. extract master and worker communication model
* 1, master persistent task
2. extract master and worker communication model
* 1, master persistent task
2. extract master and worker communication model
* add license
* modify javadoc error
* TaskExecutionContext create modify
* buildAckCommand taskInstanceId not set modify
* java doc error modify
* add comment
* ExecutorManager interface add generic type
* add TaskInstanceCacheManager receive Worker report result
* TaskInstance setExecutePath
* add TaskInstanceCacheManager to receive Worker Task result report
* TaskInstanceCacheManager add remove method
* add license
* add dispatcht task method
* AbstractCommandExecutor remove db access
* AbstractCommandExecutor remove db access
* AbstractCommandExecutor remove db access
* AbstractCommandExecutor remove db access
* AbstractCommandExecutor remove db access
* AbstractCommandExecutor remove db access
* AbstractCommandExecutor remove db access
* taskInstanceCache is null ,need load from db
* taskInstanceCache is null ,need load from db
* taskInstanceCache is null ,need load from db
* 1,worker TaskPros use TaskExecutionContext replase
2,Master kill Task , KillTaskProcessor modify
* worker remove db
* ShellTask modify
* master persistence processId and appIds
* master persistence processId and appIds
* master add kill task logic
* master add kill task logic
* master add kill task logic
* javadoc error modify
* remove chinese log
* executeDirectly method add Override
* remote module modify
* TaskKillResponseProcessor command type modify
* create buildKillCommand
* host add host:port format
* host add host:port format
* TaskAckProcessor modify
* TaskAckProcessor modify
* task prioriry refator
* remove ITaskQueue
* task prioriry refator
* remove ITaskQueue
* TaskPriority refactor
* remove logs
* WorkerServer refactor
* MasterSchedulerService modify
* WorkerConfig listen port modify
* modify master and worker listen port
* cancelTaskInstance set TaskExecutionContext host,logPath,executePath
* cancelTaskInstance set TaskExecutionContext host,logPath,executePath
* Encapsulate the parameters required by sqltask
* 1,Encapsulate the parameters required by sqltask
2,SQLTask optimization
* AbstractTask modify
* ProcedureTask optimization
* MasterSchedulerService modify
* TaskUpdateQueueConsumer modify
* test
* DataxTask process run debug
* DataxTask process run debug
* add protobuf dependency,MR、Spark task etc need this
* TaskUpdateQueueConsumer modify
* TaskExecutionContextBuilder set TaskInstance workgroup
* WorkerGroupService queryAllGroup modify
query available work group
* 1,get workergroup from zk modify
2,SpringConnectionFactory repeat load modify
* master and worker register ip use OSUtils.getHost()
* ProcessInstance host set ip:port format
* worker fault tolerance modify
* Constants and .env modify
* master fault tolerant bug modify
* UT add pom.xml
* timing online modify
* when taskResponse is faster than taskAck to db,task state will error
add async queue and new a thread reslove this problem
* TaskExecutionContext set host
* 1,TaskManager refactor
2, api start load server dolphinschedule-daemon.sh modify
* 1,TaskManager refactor
2, api start load server dolphinschedule-daemon.sh modify
* add UT in pom.xml
* revert dolphinscheduler-daemon.sh
* ZookeeperRegister use common.properties zookeeperRoot path
* api start exclude org.apache.dolphinscheduler.server.*
* ZookeeperRegister use common.properties zookeeperRoot path
* 1,api start load server filter
2,SHELL task exitStatusCode modify
* java doc error modify
* java doc error modify
* remove todo
Co-authored-by: qiaozhanwei <qiaozhanwei@analysys.com.cn>
4 years ago
|
|
|
* polling db
|
|
|
|
*
|
|
|
|
* wait task quit
|
|
|
|
* @return true if task quit success
|
|
|
|
*/
|
|
|
|
public Boolean waitTaskQuit(){
|
|
|
|
// query new state
|
|
|
|
taskInstance = processService.findTaskInstanceById(taskInstance.getId());
|
|
|
|
logger.info("wait task: process id: {}, task id:{}, task name:{} complete",
|
|
|
|
this.taskInstance.getProcessInstanceId(), this.taskInstance.getId(), this.taskInstance.getName());
|
|
|
|
// task time out
|
|
|
|
boolean checkTimeout = false;
|
|
|
|
TaskTimeoutParameter taskTimeoutParameter = getTaskTimeoutParameter();
|
|
|
|
if(taskTimeoutParameter.getEnable()){
|
|
|
|
TaskTimeoutStrategy strategy = taskTimeoutParameter.getStrategy();
|
|
|
|
if(strategy == TaskTimeoutStrategy.WARN || strategy == TaskTimeoutStrategy.WARNFAILED){
|
|
|
|
checkTimeout = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while (Stopper.isRunning()){
|
|
|
|
try {
|
|
|
|
if(this.processInstance == null){
|
|
|
|
logger.error("process instance not exists , master task exec thread exit");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
// task instance add queue , waiting worker to kill
|
|
|
|
if(this.cancel || this.processInstance.getState() == ExecutionStatus.READY_STOP){
|
|
|
|
cancelTaskInstance();
|
|
|
|
}
|
|
|
|
// task instance finished
|
|
|
|
if (taskInstance.getState().typeIsFinished()){
|
|
|
|
// if task is final result , then remove taskInstance from cache
|
|
|
|
taskInstanceCacheManager.removeByTaskInstanceId(taskInstance.getId());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if(checkTimeout){
|
|
|
|
long remainTime = getRemaintime(taskTimeoutParameter.getInterval() * 60L);
|
|
|
|
if (remainTime < 0) {
|
|
|
|
logger.warn("task id: {} execution time out",taskInstance.getId());
|
|
|
|
// process define
|
|
|
|
ProcessDefinition processDefine = processService.findProcessDefineById(processInstance.getProcessDefinitionId());
|
|
|
|
// send warn mail
|
|
|
|
alertDao.sendTaskTimeoutAlert(processInstance.getWarningGroupId(),processDefine.getReceivers(),
|
|
|
|
processDefine.getReceiversCc(), processInstance.getId(), processInstance.getName(),
|
|
|
|
taskInstance.getId(),taskInstance.getName());
|
|
|
|
checkTimeout = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// updateProcessInstance task instance
|
|
|
|
taskInstance = processService.findTaskInstanceById(taskInstance.getId());
|
|
|
|
processInstance = processService.findProcessInstanceById(processInstance.getId());
|
|
|
|
Thread.sleep(Constants.SLEEP_TIME_MILLIS);
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.error("exception",e);
|
|
|
|
if (processInstance != null) {
|
|
|
|
logger.error("wait task quit failed, instance id:{}, task id:{}",
|
|
|
|
processInstance.getId(), taskInstance.getId());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* task instance add queue , waiting worker to kill
|
|
|
|
*/
|
|
|
|
private void cancelTaskInstance() throws Exception{
|
|
|
|
if(alreadyKilled){
|
|
|
|
return ;
|
|
|
|
}
|
|
|
|
alreadyKilled = true;
|
|
|
|
|
no valid worker group,master can kill task directly (#2541)
* dispatch task fail will set task status failed
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* #2486 bug fix
* host and workergroup compatible
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* #2499 bug fix
* add comment
* revert comment
* revert comment
* #2499 buf fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
Co-authored-by: qiaozhanwei <qiaozhanwei@analysys.com.cn>
4 years ago
|
|
|
String taskInstanceWorkerGroup = taskInstance.getWorkerGroup();
|
|
|
|
|
|
|
|
// not exists
|
|
|
|
if (!existsValidWorkerGroup(taskInstanceWorkerGroup)){
|
|
|
|
taskInstance.setState(ExecutionStatus.KILL);
|
|
|
|
taskInstance.setEndTime(new Date());
|
|
|
|
processService.updateTaskInstance(taskInstance);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
TaskKillRequestCommand killCommand = new TaskKillRequestCommand();
|
|
|
|
killCommand.setTaskInstanceId(taskInstance.getId());
|
|
|
|
|
|
|
|
ExecutionContext executionContext = new ExecutionContext(killCommand.convert2Command(), ExecutorType.WORKER);
|
|
|
|
|
|
|
|
Host host = Host.of(taskInstance.getHost());
|
|
|
|
executionContext.setHost(host);
|
|
|
|
|
|
|
|
nettyExecutorManager.executeDirectly(executionContext);
|
|
|
|
|
no valid worker group,master can kill task directly (#2541)
* dispatch task fail will set task status failed
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* #2486 bug fix
* host and workergroup compatible
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* #2499 bug fix
* add comment
* revert comment
* revert comment
* #2499 buf fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
Co-authored-by: qiaozhanwei <qiaozhanwei@analysys.com.cn>
4 years ago
|
|
|
logger.info("master kill taskInstance name :{} taskInstance id:{}",
|
|
|
|
taskInstance.getName(), taskInstance.getId() );
|
|
|
|
}
|
|
|
|
|
no valid worker group,master can kill task directly (#2541)
* dispatch task fail will set task status failed
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,no worker condition , master will while ture wait for worker startup
2,worker response task status sync wait for result
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* 1,task status statistics and process status statistics bug fix (#2357)
2,worker group bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* send mail error, #2466 bug fix
* #2486 bug fix
* host and workergroup compatible
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* EnterpriseWeChatUtils modify
* #2499 bug fix
* add comment
* revert comment
* revert comment
* #2499 buf fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* #2499 bug fix
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
* no valid worker group,master can kill task directly
Co-authored-by: qiaozhanwei <qiaozhanwei@analysys.com.cn>
4 years ago
|
|
|
/**
|
|
|
|
* whether exists valid worker group
|
|
|
|
* @param taskInstanceWorkerGroup taskInstanceWorkerGroup
|
|
|
|
* @return whether exists
|
|
|
|
*/
|
|
|
|
public Boolean existsValidWorkerGroup(String taskInstanceWorkerGroup){
|
|
|
|
Set<String> workerGroups = zookeeperRegistryCenter.getWorkerGroupDirectly();
|
|
|
|
// not worker group
|
|
|
|
if (CollectionUtils.isEmpty(workerGroups)){
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// has worker group , but not taskInstance assigned worker group
|
|
|
|
if (!workerGroups.contains(taskInstanceWorkerGroup)){
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
Set<String> workers = zookeeperRegistryCenter.getWorkerGroupNodesDirectly(taskInstanceWorkerGroup);
|
|
|
|
if (CollectionUtils.isEmpty(workers)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* get task timeout parameter
|
|
|
|
* @return TaskTimeoutParameter
|
|
|
|
*/
|
|
|
|
private TaskTimeoutParameter getTaskTimeoutParameter(){
|
|
|
|
String taskJson = taskInstance.getTaskJson();
|
|
|
|
TaskNode taskNode = JSON.parseObject(taskJson, TaskNode.class);
|
|
|
|
return taskNode.getTaskTimeoutParameter();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* get remain time?s?
|
|
|
|
*
|
|
|
|
* @return remain time
|
|
|
|
*/
|
|
|
|
private long getRemaintime(long timeoutSeconds) {
|
|
|
|
Date startTime = taskInstance.getStartTime();
|
|
|
|
long usedTime = (System.currentTimeMillis() - startTime.getTime()) / 1000;
|
|
|
|
return timeoutSeconds - usedTime;
|
|
|
|
}
|
|
|
|
}
|