Browse Source

[Bug-9501][Worker] fix kill task error before running (#9509)

3.0.0/version-upgrade
caishunfeng 3 years ago committed by GitHub
parent
commit
66d148872d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/builder/TaskExecutionContextBuilder.java
  2. 15
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskExecuteProcessor.java
  3. 34
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskKillProcessor.java
  4. 16
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerManagerThread.java

4
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/builder/TaskExecutionContextBuilder.java

@ -19,7 +19,6 @@ package org.apache.dolphinscheduler.server.builder;
import static org.apache.dolphinscheduler.common.Constants.SEC_2_MINUTES_TIME_UNIT; import static org.apache.dolphinscheduler.common.Constants.SEC_2_MINUTES_TIME_UNIT;
import org.apache.dolphinscheduler.plugin.task.api.enums.TaskTimeoutStrategy;
import org.apache.dolphinscheduler.common.enums.TimeoutFlag; import org.apache.dolphinscheduler.common.enums.TimeoutFlag;
import org.apache.dolphinscheduler.dao.entity.ProcessDefinition; import org.apache.dolphinscheduler.dao.entity.ProcessDefinition;
import org.apache.dolphinscheduler.dao.entity.ProcessInstance; import org.apache.dolphinscheduler.dao.entity.ProcessInstance;
@ -27,6 +26,8 @@ import org.apache.dolphinscheduler.dao.entity.TaskDefinition;
import org.apache.dolphinscheduler.dao.entity.TaskInstance; import org.apache.dolphinscheduler.dao.entity.TaskInstance;
import org.apache.dolphinscheduler.plugin.task.api.DataQualityTaskExecutionContext; import org.apache.dolphinscheduler.plugin.task.api.DataQualityTaskExecutionContext;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext; import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
import org.apache.dolphinscheduler.plugin.task.api.enums.ExecutionStatus;
import org.apache.dolphinscheduler.plugin.task.api.enums.TaskTimeoutStrategy;
import org.apache.dolphinscheduler.plugin.task.api.parameters.resource.ResourceParametersHelper; import org.apache.dolphinscheduler.plugin.task.api.parameters.resource.ResourceParametersHelper;
/** /**
@ -60,6 +61,7 @@ public class TaskExecutionContextBuilder {
taskExecutionContext.setDelayTime(taskInstance.getDelayTime()); taskExecutionContext.setDelayTime(taskInstance.getDelayTime());
taskExecutionContext.setVarPool(taskInstance.getVarPool()); taskExecutionContext.setVarPool(taskInstance.getVarPool());
taskExecutionContext.setDryRun(taskInstance.getDryRun()); taskExecutionContext.setDryRun(taskInstance.getDryRun());
taskExecutionContext.setCurrentExecutionStatus(ExecutionStatus.SUBMITTED_SUCCESS);
return this; return this;
} }

15
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskExecuteProcessor.java

@ -85,17 +85,6 @@ public class TaskExecuteProcessor implements NettyRequestProcessor {
@Autowired @Autowired
private WorkerManagerThread workerManager; private WorkerManagerThread workerManager;
/**
* Pre-cache task to avoid extreme situations when kill task. There is no such task in the cache
*
* @param taskExecutionContext task
*/
private void setTaskCache(TaskExecutionContext taskExecutionContext) {
TaskExecutionContext preTaskCache = new TaskExecutionContext();
preTaskCache.setTaskInstanceId(taskExecutionContext.getTaskInstanceId());
TaskExecutionContextCacheManager.cacheTaskExecutionContext(preTaskCache);
}
@Override @Override
public void process(Channel channel, Command command) { public void process(Channel channel, Command command) {
Preconditions.checkArgument(CommandType.TASK_EXECUTE_REQUEST == command.getType(), Preconditions.checkArgument(CommandType.TASK_EXECUTE_REQUEST == command.getType(),
@ -118,7 +107,9 @@ public class TaskExecuteProcessor implements NettyRequestProcessor {
return; return;
} }
setTaskCache(taskExecutionContext); // set cache, it will be used when kill task
TaskExecutionContextCacheManager.cacheTaskExecutionContext(taskExecutionContext);
// todo custom logger // todo custom logger
taskExecutionContext.setHost(NetUtils.getAddr(workerConfig.getListenPort())); taskExecutionContext.setHost(NetUtils.getAddr(workerConfig.getListenPort()));

34
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskKillProcessor.java

@ -86,16 +86,26 @@ public class TaskKillProcessor implements NettyRequestProcessor {
} }
logger.info("task kill command : {}", killCommand); logger.info("task kill command : {}", killCommand);
Pair<Boolean, List<String>> result = doKill(killCommand); int taskInstanceId = killCommand.getTaskInstanceId();
TaskExecutionContext taskExecutionContext = TaskExecutionContextCacheManager.getByTaskInstanceId(taskInstanceId);
taskCallbackService.addRemoteChannel(killCommand.getTaskInstanceId(),
new NettyRemoteChannel(channel, command.getOpaque()));
TaskExecutionContext taskExecutionContext = TaskExecutionContextCacheManager.getByTaskInstanceId(killCommand.getTaskInstanceId());
if (taskExecutionContext == null) { if (taskExecutionContext == null) {
logger.error("taskRequest cache is null, taskInstanceId: {}", killCommand.getTaskInstanceId()); logger.error("taskRequest cache is null, taskInstanceId: {}", killCommand.getTaskInstanceId());
return; return;
} }
Integer processId = taskExecutionContext.getProcessId();
if (processId.equals(0)) {
workerManager.killTaskBeforeExecuteByInstanceId(taskInstanceId);
TaskExecutionContextCacheManager.removeByTaskInstanceId(taskInstanceId);
logger.info("the task has not been executed and has been cancelled, task id:{}", taskInstanceId);
return;
}
Pair<Boolean, List<String>> result = doKill(taskExecutionContext);
taskCallbackService.addRemoteChannel(killCommand.getTaskInstanceId(),
new NettyRemoteChannel(channel, command.getOpaque()));
taskExecutionContext.setCurrentExecutionStatus(result.getLeft() ? ExecutionStatus.SUCCESS : ExecutionStatus.FAILURE); taskExecutionContext.setCurrentExecutionStatus(result.getLeft() ? ExecutionStatus.SUCCESS : ExecutionStatus.FAILURE);
taskExecutionContext.setAppIds(String.join(TaskConstants.COMMA, result.getRight())); taskExecutionContext.setAppIds(String.join(TaskConstants.COMMA, result.getRight()));
@ -110,21 +120,11 @@ public class TaskKillProcessor implements NettyRequestProcessor {
* *
* @return kill result * @return kill result
*/ */
private Pair<Boolean, List<String>> doKill(TaskKillRequestCommand killCommand) { private Pair<Boolean, List<String>> doKill(TaskExecutionContext taskExecutionContext) {
boolean processFlag = true; boolean processFlag = true;
List<String> appIds = Collections.emptyList(); List<String> appIds = Collections.emptyList();
int taskInstanceId = killCommand.getTaskInstanceId();
TaskExecutionContext taskExecutionContext = TaskExecutionContextCacheManager.getByTaskInstanceId(taskInstanceId);
try { try {
Integer processId = taskExecutionContext.getProcessId();
if (processId.equals(0)) {
workerManager.killTaskBeforeExecuteByInstanceId(taskInstanceId);
TaskExecutionContextCacheManager.removeByTaskInstanceId(taskInstanceId);
logger.info("the task has not been executed and has been cancelled, task id:{}", taskInstanceId);
return Pair.of(true, appIds);
}
String pidsStr = ProcessUtils.getPidsStr(taskExecutionContext.getProcessId()); String pidsStr = ProcessUtils.getPidsStr(taskExecutionContext.getProcessId());
if (!StringUtils.isEmpty(pidsStr)) { if (!StringUtils.isEmpty(pidsStr)) {
String cmd = String.format("kill -9 %s", pidsStr); String cmd = String.format("kill -9 %s", pidsStr);

16
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerManagerThread.java

@ -17,26 +17,24 @@
package org.apache.dolphinscheduler.server.worker.runner; package org.apache.dolphinscheduler.server.worker.runner;
import org.apache.dolphinscheduler.common.enums.Event;
import org.apache.dolphinscheduler.common.storage.StorageOperate; import org.apache.dolphinscheduler.common.storage.StorageOperate;
import org.apache.dolphinscheduler.common.thread.Stopper; import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.common.thread.ThreadUtils; import org.apache.dolphinscheduler.common.thread.ThreadUtils;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext; import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContextCacheManager; import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContextCacheManager;
import org.apache.dolphinscheduler.plugin.task.api.enums.ExecutionStatus; import org.apache.dolphinscheduler.plugin.task.api.enums.ExecutionStatus;
import org.apache.dolphinscheduler.remote.command.TaskExecuteResponseCommand;
import org.apache.dolphinscheduler.server.worker.cache.ResponseCache;
import org.apache.dolphinscheduler.server.worker.config.WorkerConfig; import org.apache.dolphinscheduler.server.worker.config.WorkerConfig;
import org.apache.dolphinscheduler.server.worker.processor.TaskCallbackService; import org.apache.dolphinscheduler.server.worker.processor.TaskCallbackService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.concurrent.DelayQueue; import java.util.concurrent.DelayQueue;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
/** /**
* Manage tasks * Manage tasks
*/ */
@ -105,9 +103,7 @@ public class WorkerManagerThread implements Runnable {
if (taskExecutionContext == null) { if (taskExecutionContext == null) {
return; return;
} }
TaskExecuteResponseCommand responseCommand = new TaskExecuteResponseCommand(taskExecutionContext.getTaskInstanceId(), taskExecutionContext.getProcessInstanceId()); taskExecutionContext.setCurrentExecutionStatus(ExecutionStatus.KILL);
responseCommand.setStatus(ExecutionStatus.KILL.getCode());
ResponseCache.get().cache(taskExecutionContext.getTaskInstanceId(), responseCommand.convert2Command(), Event.RESULT);
taskCallbackService.sendTaskExecuteResponseCommand(taskExecutionContext); taskCallbackService.sendTaskExecuteResponseCommand(taskExecutionContext);
} }

Loading…
Cancel
Save