|
|
|
@ -19,14 +19,17 @@ package org.apache.dolphinscheduler.server.master.consumer;
|
|
|
|
|
|
|
|
|
|
import org.apache.dolphinscheduler.common.constants.Constants; |
|
|
|
|
import org.apache.dolphinscheduler.common.enums.Flag; |
|
|
|
|
import org.apache.dolphinscheduler.common.enums.TaskEventType; |
|
|
|
|
import org.apache.dolphinscheduler.common.lifecycle.ServerLifeCycleManager; |
|
|
|
|
import org.apache.dolphinscheduler.common.thread.BaseDaemonThread; |
|
|
|
|
import org.apache.dolphinscheduler.common.thread.ThreadUtils; |
|
|
|
|
import org.apache.dolphinscheduler.common.utils.DateUtils; |
|
|
|
|
import org.apache.dolphinscheduler.dao.entity.TaskInstance; |
|
|
|
|
import org.apache.dolphinscheduler.dao.repository.TaskInstanceDao; |
|
|
|
|
import org.apache.dolphinscheduler.dao.utils.TaskCacheUtils; |
|
|
|
|
import org.apache.dolphinscheduler.plugin.storage.api.StorageOperate; |
|
|
|
|
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext; |
|
|
|
|
import org.apache.dolphinscheduler.plugin.task.api.enums.TaskExecutionStatus; |
|
|
|
|
import org.apache.dolphinscheduler.remote.command.Command; |
|
|
|
|
import org.apache.dolphinscheduler.remote.command.TaskDispatchCommand; |
|
|
|
|
import org.apache.dolphinscheduler.server.master.cache.ProcessInstanceExecCacheManager; |
|
|
|
@ -35,6 +38,7 @@ import org.apache.dolphinscheduler.server.master.dispatch.ExecutorDispatcher;
|
|
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.context.ExecutionContext; |
|
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.enums.ExecutorType; |
|
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.exceptions.ExecuteException; |
|
|
|
|
import org.apache.dolphinscheduler.server.master.dispatch.exceptions.WorkerGroupNotFoundException; |
|
|
|
|
import org.apache.dolphinscheduler.server.master.metrics.TaskMetrics; |
|
|
|
|
import org.apache.dolphinscheduler.server.master.processor.queue.TaskEvent; |
|
|
|
|
import org.apache.dolphinscheduler.server.master.processor.queue.TaskEventService; |
|
|
|
@ -47,6 +51,7 @@ import org.apache.commons.collections4.CollectionUtils;
|
|
|
|
|
|
|
|
|
|
import java.util.ArrayList; |
|
|
|
|
import java.util.Collections; |
|
|
|
|
import java.util.Date; |
|
|
|
|
import java.util.List; |
|
|
|
|
import java.util.Objects; |
|
|
|
|
import java.util.Optional; |
|
|
|
@ -171,8 +176,17 @@ public class TaskPriorityQueueConsumer extends BaseDaemonThread {
|
|
|
|
|
|
|
|
|
|
consumerThreadPoolExecutor.submit(() -> { |
|
|
|
|
try { |
|
|
|
|
boolean dispatchResult = this.dispatchTask(taskPriority); |
|
|
|
|
if (!dispatchResult) { |
|
|
|
|
try { |
|
|
|
|
this.dispatchTask(taskPriority); |
|
|
|
|
} catch (WorkerGroupNotFoundException e) { |
|
|
|
|
// If the worker group not found, will not try to dispatch again.
|
|
|
|
|
// The task instance will be failed
|
|
|
|
|
// todo:
|
|
|
|
|
addDispatchFailedEvent(taskPriority); |
|
|
|
|
} catch (ExecuteException e) { |
|
|
|
|
failedDispatchTasks.add(taskPriority); |
|
|
|
|
} catch (Exception e) { |
|
|
|
|
logger.error("Dispatch task error, meet an unknown exception", e); |
|
|
|
|
failedDispatchTasks.add(taskPriority); |
|
|
|
|
} |
|
|
|
|
} finally { |
|
|
|
@ -193,60 +207,50 @@ public class TaskPriorityQueueConsumer extends BaseDaemonThread {
|
|
|
|
|
* @param taskPriority taskPriority |
|
|
|
|
* @return dispatch result, return true if dispatch success, return false if dispatch failed. |
|
|
|
|
*/ |
|
|
|
|
protected boolean dispatchTask(TaskPriority taskPriority) { |
|
|
|
|
protected void dispatchTask(TaskPriority taskPriority) throws ExecuteException { |
|
|
|
|
TaskMetrics.incTaskDispatch(); |
|
|
|
|
boolean result = false; |
|
|
|
|
try { |
|
|
|
|
WorkflowExecuteRunnable workflowExecuteRunnable = |
|
|
|
|
processInstanceExecCacheManager.getByProcessInstanceId(taskPriority.getProcessInstanceId()); |
|
|
|
|
if (workflowExecuteRunnable == null) { |
|
|
|
|
logger.error("Cannot find the related processInstance of the task, taskPriority: {}", taskPriority); |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
Optional<TaskInstance> taskInstanceOptional = |
|
|
|
|
workflowExecuteRunnable.getTaskInstance(taskPriority.getTaskId()); |
|
|
|
|
if (!taskInstanceOptional.isPresent()) { |
|
|
|
|
logger.error("Cannot find the task instance from related processInstance, taskPriority: {}", |
|
|
|
|
taskPriority); |
|
|
|
|
// we return true, so that we will drop this task.
|
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
TaskInstance taskInstance = taskInstanceOptional.get(); |
|
|
|
|
TaskExecutionContext context = taskPriority.getTaskExecutionContext(); |
|
|
|
|
ExecutionContext executionContext = |
|
|
|
|
new ExecutionContext(toCommand(context), ExecutorType.WORKER, context.getWorkerGroup(), |
|
|
|
|
taskInstance); |
|
|
|
|
|
|
|
|
|
if (isTaskNeedToCheck(taskPriority)) { |
|
|
|
|
if (taskInstanceIsFinalState(taskPriority.getTaskId())) { |
|
|
|
|
// when task finish, ignore this task, there is no need to dispatch anymore
|
|
|
|
|
logger.info("Task {} is already finished, no need to dispatch, task instance id: {}", |
|
|
|
|
taskInstance.getName(), taskInstance.getId()); |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// check task is cache execution, and decide whether to dispatch
|
|
|
|
|
if (checkIsCacheExecution(taskInstance, context)) { |
|
|
|
|
return true; |
|
|
|
|
WorkflowExecuteRunnable workflowExecuteRunnable = |
|
|
|
|
processInstanceExecCacheManager.getByProcessInstanceId(taskPriority.getProcessInstanceId()); |
|
|
|
|
if (workflowExecuteRunnable == null) { |
|
|
|
|
logger.error("Cannot find the related processInstance of the task, taskPriority: {}", taskPriority); |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
Optional<TaskInstance> taskInstanceOptional = |
|
|
|
|
workflowExecuteRunnable.getTaskInstance(taskPriority.getTaskId()); |
|
|
|
|
if (!taskInstanceOptional.isPresent()) { |
|
|
|
|
logger.error("Cannot find the task instance from related processInstance, taskPriority: {}", |
|
|
|
|
taskPriority); |
|
|
|
|
// we return true, so that we will drop this task.
|
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
TaskInstance taskInstance = taskInstanceOptional.get(); |
|
|
|
|
TaskExecutionContext context = taskPriority.getTaskExecutionContext(); |
|
|
|
|
ExecutionContext executionContext = ExecutionContext.builder() |
|
|
|
|
.taskInstance(taskInstance) |
|
|
|
|
.workerGroup(context.getWorkerGroup()) |
|
|
|
|
.executorType(ExecutorType.WORKER) |
|
|
|
|
.command(toCommand(context)) |
|
|
|
|
.build(); |
|
|
|
|
|
|
|
|
|
if (isTaskNeedToCheck(taskPriority)) { |
|
|
|
|
if (taskInstanceIsFinalState(taskPriority.getTaskId())) { |
|
|
|
|
// when task finish, ignore this task, there is no need to dispatch anymore
|
|
|
|
|
logger.info("Task {} is already finished, no need to dispatch, task instance id: {}", |
|
|
|
|
taskInstance.getName(), taskInstance.getId()); |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
result = dispatcher.dispatch(executionContext); |
|
|
|
|
|
|
|
|
|
if (result) { |
|
|
|
|
logger.info("Master success dispatch task to worker, taskInstanceId: {}, worker: {}", |
|
|
|
|
taskPriority.getTaskId(), |
|
|
|
|
executionContext.getHost()); |
|
|
|
|
addDispatchEvent(context, executionContext); |
|
|
|
|
} else { |
|
|
|
|
logger.info("Master failed to dispatch task to worker, taskInstanceId: {}, worker: {}", |
|
|
|
|
taskPriority.getTaskId(), |
|
|
|
|
executionContext.getHost()); |
|
|
|
|
} |
|
|
|
|
} catch (RuntimeException | ExecuteException e) { |
|
|
|
|
logger.error("Master dispatch task to worker error, taskPriority: {}", taskPriority, e); |
|
|
|
|
// check task is cache execution, and decide whether to dispatch
|
|
|
|
|
if (checkIsCacheExecution(taskInstance, context)) { |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
return result; |
|
|
|
|
|
|
|
|
|
dispatcher.dispatch(executionContext); |
|
|
|
|
logger.info("Master success dispatch task to worker, taskInstanceId: {}, worker: {}", |
|
|
|
|
taskPriority.getTaskId(), |
|
|
|
|
executionContext.getHost()); |
|
|
|
|
addDispatchEvent(context, executionContext); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
@ -258,6 +262,24 @@ public class TaskPriorityQueueConsumer extends BaseDaemonThread {
|
|
|
|
|
taskEventService.addEvent(taskEvent); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private void addDispatchFailedEvent(TaskPriority taskPriority) { |
|
|
|
|
TaskExecutionContext taskExecutionContext = taskPriority.getTaskExecutionContext(); |
|
|
|
|
TaskEvent taskEvent = TaskEvent.builder() |
|
|
|
|
.processInstanceId(taskPriority.getProcessInstanceId()) |
|
|
|
|
.taskInstanceId(taskPriority.getTaskId()) |
|
|
|
|
.state(TaskExecutionStatus.FAILURE) |
|
|
|
|
.logPath(taskExecutionContext.getLogPath()) |
|
|
|
|
.executePath(taskExecutionContext.getExecutePath()) |
|
|
|
|
.appIds(taskExecutionContext.getAppIds()) |
|
|
|
|
.processId(taskExecutionContext.getProcessId()) |
|
|
|
|
.varPool(taskExecutionContext.getVarPool()) |
|
|
|
|
.startTime(DateUtils.timeStampToDate(taskExecutionContext.getStartTime())) |
|
|
|
|
.endTime(new Date()) |
|
|
|
|
.event(TaskEventType.RESULT) |
|
|
|
|
.build(); |
|
|
|
|
taskEventService.addEvent(taskEvent); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private Command toCommand(TaskExecutionContext taskExecutionContext) { |
|
|
|
|
// todo: we didn't set the host here, since right now we didn't need to retry this message.
|
|
|
|
|
TaskDispatchCommand requestCommand = new TaskDispatchCommand(taskExecutionContext, |
|
|
|
|