Browse Source

Optimize master log, use MDC to inject workflow instance id and task instance id in log (#10516)

* Optimize master log, add workflow instance id and task instance id in log

* Use MDC to set the workflow info in log4j

* Add workflowInstanceId and taskInstanceId in MDC
3.1.0-release
Wenjun Ruan 2 years ago committed by GitHub
parent
commit
db595b3eff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 23
      dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/java/org/apache/dolphinscheduler/alert/AlertServer.java
  2. 6
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java
  3. 34
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/thread/Stopper.java
  4. 10
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/thread/ThreadUtils.java
  5. 34
      dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/LoggerUtils.java
  6. 23
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/MasterServer.java
  7. 10
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/StateEventProcessor.java
  8. 8
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/TaskEventProcessor.java
  9. 10
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/TaskExecuteResponseProcessor.java
  10. 3
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/TaskKillResponseProcessor.java
  11. 8
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/TaskRecallProcessor.java
  12. 21
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/queue/StateEventResponseService.java
  13. 3
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/queue/TaskExecuteRunnable.java
  14. 8
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/queue/TaskExecuteThreadPool.java
  15. 13
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterRegistryClient.java
  16. 16
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/EventExecuteService.java
  17. 5
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/FailoverExecuteThread.java
  18. 46
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/MasterSchedulerService.java
  19. 112
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/StateWheelExecuteThread.java
  20. 11
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java
  21. 27
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteThreadPool.java
  22. 9
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/task/TaskProcessorFactory.java
  23. 24
      dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/FailoverService.java
  24. 4
      dolphinscheduler-master/src/main/resources/logback-spring.xml
  25. 1
      dolphinscheduler-scheduler-plugin/dolphinscheduler-scheduler-quartz/src/main/java/org/apache/dolphinscheduler/scheduler/quartz/QuartzScheduler.java
  26. 21
      dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/alert/AlertClientService.java
  27. 5
      dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/task/TaskPluginManager.java
  28. 4
      dolphinscheduler-standalone-server/src/main/resources/logback-spring.xml
  29. 15
      dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/ProcessUtils.java
  30. 25
      dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/TaskPluginException.java
  31. 111
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/WorkerServer.java
  32. 97
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/prc/WorkerRpcServer.java
  33. 14
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskExecuteProcessor.java
  34. 7
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskExecuteRunningAckProcessor.java
  35. 4
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/RetryReportTaskStatusThread.java
  36. 19
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/TaskExecuteThread.java
  37. 3
      dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerManagerThread.java
  38. 4
      dolphinscheduler-worker/src/main/resources/logback-spring.xml
  39. 20
      dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/processor/TaskExecuteProcessorTest.java
  40. 21
      dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/runner/TaskExecuteThreadTest.java

23
dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/java/org/apache/dolphinscheduler/alert/AlertServer.java

@ -48,7 +48,10 @@ public class AlertServer implements Closeable {
private final AlertConfig alertConfig; private final AlertConfig alertConfig;
private NettyRemotingServer nettyRemotingServer; private NettyRemotingServer nettyRemotingServer;
public AlertServer(PluginDao pluginDao, AlertSenderService alertSenderService, AlertRequestProcessor alertRequestProcessor, AlertConfig alertConfig) { public AlertServer(PluginDao pluginDao,
AlertSenderService alertSenderService,
AlertRequestProcessor alertRequestProcessor,
AlertConfig alertConfig) {
this.pluginDao = pluginDao; this.pluginDao = pluginDao;
this.alertSenderService = alertSenderService; this.alertSenderService = alertSenderService;
this.alertRequestProcessor = alertRequestProcessor; this.alertRequestProcessor = alertRequestProcessor;
@ -67,11 +70,12 @@ public class AlertServer implements Closeable {
@EventListener @EventListener
public void run(ApplicationReadyEvent readyEvent) { public void run(ApplicationReadyEvent readyEvent) {
logger.info("alert server starting..."); logger.info("Alert server is staring ...");
checkTable(); checkTable();
startServer(); startServer();
alertSenderService.start(); alertSenderService.start();
logger.info("Alert server is started ...");
} }
@Override @Override
@ -88,24 +92,23 @@ public class AlertServer implements Closeable {
public void destroy(String cause) { public void destroy(String cause) {
try { try {
// set stop signal is true
// execute only once // execute only once
if (Stopper.isStopped()) { if (!Stopper.stop()) {
logger.warn("AlterServer is already stopped");
return; return;
} }
logger.info("alert server is stopping ..., cause : {}", cause); logger.info("Alert server is stopping, cause: {}", cause);
// set stop signal is true
Stopper.stop();
// thread sleep 3 seconds for thread quietly stop // thread sleep 3 seconds for thread quietly stop
ThreadUtils.sleep(3000L); ThreadUtils.sleep(Constants.SERVER_CLOSE_WAIT_TIME.toMillis());
// close // close
this.nettyRemotingServer.close(); this.nettyRemotingServer.close();
logger.info("Alter server stopped, cause: {}", cause);
} catch (Exception e) { } catch (Exception e) {
logger.error("alert server stop exception ", e); logger.error("Alert server stop failed, cause: {}", cause, e);
} }
} }

6
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java

@ -22,6 +22,7 @@ import org.apache.dolphinscheduler.plugin.task.api.enums.ExecutionStatus;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.SystemUtils; import org.apache.commons.lang3.SystemUtils;
import java.time.Duration;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /**
@ -380,6 +381,8 @@ public final class Constants {
*/ */
public static final long SLEEP_TIME_MILLIS_SHORT = 100L; public static final long SLEEP_TIME_MILLIS_SHORT = 100L;
public static final Duration SERVER_CLOSE_WAIT_TIME = Duration.ofSeconds(3);
/** /**
* one second mils * one second mils
*/ */
@ -620,6 +623,9 @@ public final class Constants {
*/ */
public static final String LOGIN_USER_KEY_TAB_PATH = "login.user.keytab.path"; public static final String LOGIN_USER_KEY_TAB_PATH = "login.user.keytab.path";
public static final String WORKFLOW_INSTANCE_ID_MDC_KEY = "workflowInstanceId";
public static final String TASK_INSTANCE_ID_MDC_KEY = "taskInstanceId";
/** /**
* task log info format * task log info format
*/ */

34
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/thread/Stopper.java

@ -19,22 +19,40 @@ package org.apache.dolphinscheduler.common.thread;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import lombok.experimental.UtilityClass;
/** /**
* if the process closes, a signal is placed as true, and all threads get this flag to stop working * If the process closes, a signal is placed as true, and all threads get this flag to stop working.
*/ */
@UtilityClass
public class Stopper { public class Stopper {
private static AtomicBoolean signal = new AtomicBoolean(false); private static final AtomicBoolean stoppedSignal = new AtomicBoolean(false);
public static final boolean isStopped() { /**
return signal.get(); * Return the flag if the Server is stopped.
*
* @return True, if the server is stopped; False, the server is still running.
*/
public static boolean isStopped() {
return stoppedSignal.get();
} }
public static final boolean isRunning() { /**
return !signal.get(); * Return the flag if the Server is stopped.
*
* @return True, if the server is running, False, the server is stopped.
*/
public static boolean isRunning() {
return !stoppedSignal.get();
} }
public static final void stop() { /**
signal.set(true); * Stop the server
*
* @return True, if the server stopped success. False, if the server is already stopped.
*/
public static boolean stop() {
return stoppedSignal.compareAndSet(false, true);
} }
} }

10
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/thread/ThreadUtils.java

@ -23,12 +23,13 @@ import java.util.concurrent.ThreadFactory;
import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.common.util.concurrent.ThreadFactoryBuilder;
/** import lombok.experimental.UtilityClass;
* thread utils
*/ @UtilityClass
public class ThreadUtils { public class ThreadUtils {
/** /**
* Wrapper over newDaemonFixedThreadExecutor. * Wrapper over newDaemonFixedThreadExecutor.
*
* @param threadName threadName * @param threadName threadName
* @param threadsNum threadsNum * @param threadsNum threadsNum
* @return ExecutorService * @return ExecutorService
@ -41,6 +42,9 @@ public class ThreadUtils {
return Executors.newFixedThreadPool(threadsNum, threadFactory); return Executors.newFixedThreadPool(threadsNum, threadFactory);
} }
/**
* Sleep in given mills, this is not accuracy.
*/
public static void sleep(final long millis) { public static void sleep(final long millis) {
try { try {
Thread.sleep(millis); Thread.sleep(millis);

34
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/LoggerUtils.java

@ -32,16 +32,16 @@ import java.util.regex.Pattern;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
import lombok.experimental.UtilityClass;
/** /**
* logger utils * logger utils
*/ */
@UtilityClass
public class LoggerUtils { public class LoggerUtils {
private LoggerUtils() {
throw new UnsupportedOperationException("Construct LoggerUtils");
}
private static final Logger logger = LoggerFactory.getLogger(LoggerUtils.class); private static final Logger logger = LoggerFactory.getLogger(LoggerUtils.class);
/** /**
@ -109,4 +109,30 @@ public class LoggerUtils {
} }
return ""; return "";
} }
public static void setWorkflowAndTaskInstanceIDMDC(int workflowInstanceId, int taskInstanceId) {
setWorkflowInstanceIdMDC(workflowInstanceId);
setTaskInstanceIdMDC(taskInstanceId);
}
public static void setWorkflowInstanceIdMDC(int workflowInstanceId) {
MDC.put(Constants.WORKFLOW_INSTANCE_ID_MDC_KEY, String.valueOf(workflowInstanceId));
}
public static void setTaskInstanceIdMDC(int taskInstanceId) {
MDC.put(Constants.TASK_INSTANCE_ID_MDC_KEY, String.valueOf(taskInstanceId));
}
public static void removeWorkflowAndTaskInstanceIdMDC() {
removeWorkflowInstanceIdMDC();
removeTaskInstanceIdMDC();
}
public static void removeWorkflowInstanceIdMDC() {
MDC.remove(Constants.WORKFLOW_INSTANCE_ID_MDC_KEY);
}
public static void removeTaskInstanceIdMDC() {
MDC.remove(Constants.TASK_INSTANCE_ID_MDC_KEY);
}
} }

23
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/MasterServer.java

@ -20,6 +20,7 @@ package org.apache.dolphinscheduler.server.master;
import org.apache.dolphinscheduler.common.Constants; import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.IStoppable; import org.apache.dolphinscheduler.common.IStoppable;
import org.apache.dolphinscheduler.common.thread.Stopper; import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.common.thread.ThreadUtils;
import org.apache.dolphinscheduler.scheduler.api.SchedulerApi; import org.apache.dolphinscheduler.scheduler.api.SchedulerApi;
import org.apache.dolphinscheduler.server.master.registry.MasterRegistryClient; import org.apache.dolphinscheduler.server.master.registry.MasterRegistryClient;
import org.apache.dolphinscheduler.server.master.rpc.MasterRPCServer; import org.apache.dolphinscheduler.server.master.rpc.MasterRPCServer;
@ -103,7 +104,7 @@ public class MasterServer implements IStoppable {
Runtime.getRuntime().addShutdownHook(new Thread(() -> { Runtime.getRuntime().addShutdownHook(new Thread(() -> {
if (Stopper.isRunning()) { if (Stopper.isRunning()) {
close("shutdownHook"); close("MasterServer shutdownHook");
} }
})); }));
} }
@ -116,23 +117,17 @@ public class MasterServer implements IStoppable {
public void close(String cause) { public void close(String cause) {
try { try {
// set stop signal is true
// execute only once // execute only once
if (Stopper.isStopped()) { if (!Stopper.stop()) {
logger.warn("MasterServer has been stopped ..., current cause: {}", cause); logger.warn("MasterServer is already stopped, current cause: {}", cause);
return; return;
} }
logger.info("master server is stopping ..., cause : {}", cause); logger.info("Master server is stopping, current cause : {}", cause);
// set stop signal is true
Stopper.stop();
try {
// thread sleep 3 seconds for thread quietly stop // thread sleep 3 seconds for thread quietly stop
Thread.sleep(3000L); ThreadUtils.sleep(Constants.SERVER_CLOSE_WAIT_TIME.toMillis());
} catch (Exception e) {
logger.warn("thread sleep exception ", e);
}
// close // close
this.schedulerApi.close(); this.schedulerApi.close();
this.masterSchedulerService.close(); this.masterSchedulerService.close();
@ -142,9 +137,9 @@ public class MasterServer implements IStoppable {
// like ServerNodeManager,HostManager,TaskResponseService,CuratorZookeeperClient,etc // like ServerNodeManager,HostManager,TaskResponseService,CuratorZookeeperClient,etc
springApplicationContext.close(); springApplicationContext.close();
logger.info("MasterServer stopped..."); logger.info("MasterServer stopped, current cause: {}", cause);
} catch (Exception e) { } catch (Exception e) {
logger.error("master server stop exception ", e); logger.error("MasterServer stop failed, current cause: {}", cause, e);
} }
} }

10
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/StateEventProcessor.java

@ -20,6 +20,7 @@ package org.apache.dolphinscheduler.server.master.processor;
import org.apache.dolphinscheduler.common.enums.StateEvent; import org.apache.dolphinscheduler.common.enums.StateEvent;
import org.apache.dolphinscheduler.common.enums.StateEventType; import org.apache.dolphinscheduler.common.enums.StateEventType;
import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.plugin.task.api.enums.ExecutionStatus; import org.apache.dolphinscheduler.plugin.task.api.enums.ExecutionStatus;
import org.apache.dolphinscheduler.remote.command.Command; import org.apache.dolphinscheduler.remote.command.Command;
import org.apache.dolphinscheduler.remote.command.CommandType; import org.apache.dolphinscheduler.remote.command.CommandType;
@ -64,8 +65,15 @@ public class StateEventProcessor implements NettyRequestProcessor {
StateEventType type = stateEvent.getTaskInstanceId() == 0 ? StateEventType.PROCESS_STATE_CHANGE : StateEventType.TASK_STATE_CHANGE; StateEventType type = stateEvent.getTaskInstanceId() == 0 ? StateEventType.PROCESS_STATE_CHANGE : StateEventType.TASK_STATE_CHANGE;
stateEvent.setType(type); stateEvent.setType(type);
logger.info("received command : {}", stateEvent); try {
LoggerUtils.setWorkflowAndTaskInstanceIDMDC(stateEvent.getProcessInstanceId(), stateEvent.getTaskInstanceId());
logger.info("Received state event change command, event: {}", stateEvent);
stateEventResponseService.addResponse(stateEvent); stateEventResponseService.addResponse(stateEvent);
}finally {
LoggerUtils.removeWorkflowAndTaskInstanceIdMDC();
}
} }
} }

8
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/TaskEventProcessor.java

@ -20,6 +20,7 @@ package org.apache.dolphinscheduler.server.master.processor;
import org.apache.dolphinscheduler.common.enums.StateEvent; import org.apache.dolphinscheduler.common.enums.StateEvent;
import org.apache.dolphinscheduler.common.enums.StateEventType; import org.apache.dolphinscheduler.common.enums.StateEventType;
import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.remote.command.Command; import org.apache.dolphinscheduler.remote.command.Command;
import org.apache.dolphinscheduler.remote.command.CommandType; import org.apache.dolphinscheduler.remote.command.CommandType;
import org.apache.dolphinscheduler.remote.command.TaskEventChangeCommand; import org.apache.dolphinscheduler.remote.command.TaskEventChangeCommand;
@ -58,8 +59,13 @@ public class TaskEventProcessor implements NettyRequestProcessor {
stateEvent.setProcessInstanceId(taskEventChangeCommand.getProcessInstanceId()); stateEvent.setProcessInstanceId(taskEventChangeCommand.getProcessInstanceId());
stateEvent.setTaskInstanceId(taskEventChangeCommand.getTaskInstanceId()); stateEvent.setTaskInstanceId(taskEventChangeCommand.getTaskInstanceId());
stateEvent.setType(StateEventType.WAIT_TASK_GROUP); stateEvent.setType(StateEventType.WAIT_TASK_GROUP);
logger.info("received command : {}", stateEvent); try {
LoggerUtils.setWorkflowAndTaskInstanceIDMDC(stateEvent.getProcessInstanceId(), stateEvent.getTaskInstanceId());
logger.info("Received task event change command, event: {}", stateEvent);
stateEventResponseService.addEvent2WorkflowExecute(stateEvent); stateEventResponseService.addEvent2WorkflowExecute(stateEvent);
} finally {
LoggerUtils.removeWorkflowAndTaskInstanceIdMDC();
}
} }
} }

10
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/TaskExecuteResponseProcessor.java

@ -18,6 +18,7 @@
package org.apache.dolphinscheduler.server.master.processor; package org.apache.dolphinscheduler.server.master.processor;
import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.remote.command.Command; import org.apache.dolphinscheduler.remote.command.Command;
import org.apache.dolphinscheduler.remote.command.CommandType; import org.apache.dolphinscheduler.remote.command.CommandType;
import org.apache.dolphinscheduler.remote.command.TaskExecuteResponseCommand; import org.apache.dolphinscheduler.remote.command.TaskExecuteResponseCommand;
@ -57,9 +58,14 @@ public class TaskExecuteResponseProcessor implements NettyRequestProcessor {
Preconditions.checkArgument(CommandType.TASK_EXECUTE_RESPONSE == command.getType(), String.format("invalid command type : %s", command.getType())); Preconditions.checkArgument(CommandType.TASK_EXECUTE_RESPONSE == command.getType(), String.format("invalid command type : %s", command.getType()));
TaskExecuteResponseCommand taskExecuteResponseCommand = JSONUtils.parseObject(command.getBody(), TaskExecuteResponseCommand.class); TaskExecuteResponseCommand taskExecuteResponseCommand = JSONUtils.parseObject(command.getBody(), TaskExecuteResponseCommand.class);
logger.info("received command : {}", taskExecuteResponseCommand);
TaskEvent taskResponseEvent = TaskEvent.newResultEvent(taskExecuteResponseCommand, channel); TaskEvent taskResponseEvent = TaskEvent.newResultEvent(taskExecuteResponseCommand, channel);
try {
LoggerUtils.setWorkflowAndTaskInstanceIDMDC(taskResponseEvent.getProcessInstanceId(), taskResponseEvent.getTaskInstanceId());
logger.info("Received task execute response, event: {}", taskResponseEvent);
taskEventService.addEvent(taskResponseEvent); taskEventService.addEvent(taskResponseEvent);
} finally {
LoggerUtils.removeWorkflowAndTaskInstanceIdMDC();
}
} }
} }

3
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/TaskKillResponseProcessor.java

@ -51,7 +51,8 @@ public class TaskKillResponseProcessor implements NettyRequestProcessor {
Preconditions.checkArgument(CommandType.TASK_KILL_RESPONSE == command.getType(), String.format("invalid command type : %s", command.getType())); Preconditions.checkArgument(CommandType.TASK_KILL_RESPONSE == command.getType(), String.format("invalid command type : %s", command.getType()));
TaskKillResponseCommand responseCommand = JSONUtils.parseObject(command.getBody(), TaskKillResponseCommand.class); TaskKillResponseCommand responseCommand = JSONUtils.parseObject(command.getBody(), TaskKillResponseCommand.class);
logger.info("received task kill response command : {}", responseCommand); logger.info("[TaskInstance-{}] Received task kill response command : {}",
responseCommand.getTaskInstanceId(), responseCommand);
} }
} }

8
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/TaskRecallProcessor.java

@ -18,6 +18,7 @@
package org.apache.dolphinscheduler.server.master.processor; package org.apache.dolphinscheduler.server.master.processor;
import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.remote.command.Command; import org.apache.dolphinscheduler.remote.command.Command;
import org.apache.dolphinscheduler.remote.command.CommandType; import org.apache.dolphinscheduler.remote.command.CommandType;
import org.apache.dolphinscheduler.remote.command.TaskRecallCommand; import org.apache.dolphinscheduler.remote.command.TaskRecallCommand;
@ -55,8 +56,13 @@ public class TaskRecallProcessor implements NettyRequestProcessor {
public void process(Channel channel, Command command) { public void process(Channel channel, Command command) {
Preconditions.checkArgument(CommandType.TASK_RECALL == command.getType(), String.format("invalid command type : %s", command.getType())); Preconditions.checkArgument(CommandType.TASK_RECALL == command.getType(), String.format("invalid command type : %s", command.getType()));
TaskRecallCommand recallCommand = JSONUtils.parseObject(command.getBody(), TaskRecallCommand.class); TaskRecallCommand recallCommand = JSONUtils.parseObject(command.getBody(), TaskRecallCommand.class);
logger.info("taskRecallCommand : {}", recallCommand);
TaskEvent taskEvent = TaskEvent.newRecallEvent(recallCommand, channel); TaskEvent taskEvent = TaskEvent.newRecallEvent(recallCommand, channel);
try {
LoggerUtils.setWorkflowAndTaskInstanceIDMDC(recallCommand.getProcessInstanceId(), recallCommand.getTaskInstanceId());
logger.info("Receive task recall command: {}", recallCommand);
taskEventService.addEvent(taskEvent); taskEventService.addEvent(taskEvent);
} finally {
LoggerUtils.removeWorkflowInstanceIdMDC();
}
} }
} }

21
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/queue/StateEventResponseService.java

@ -20,6 +20,7 @@ package org.apache.dolphinscheduler.server.master.processor.queue;
import org.apache.dolphinscheduler.common.enums.StateEvent; import org.apache.dolphinscheduler.common.enums.StateEvent;
import org.apache.dolphinscheduler.common.thread.BaseDaemonThread; import org.apache.dolphinscheduler.common.thread.BaseDaemonThread;
import org.apache.dolphinscheduler.common.thread.Stopper; import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.plugin.task.api.enums.ExecutionStatus; import org.apache.dolphinscheduler.plugin.task.api.enums.ExecutionStatus;
import org.apache.dolphinscheduler.remote.command.StateEventResponseCommand; import org.apache.dolphinscheduler.remote.command.StateEventResponseCommand;
import org.apache.dolphinscheduler.server.master.cache.ProcessInstanceExecCacheManager; import org.apache.dolphinscheduler.server.master.cache.ProcessInstanceExecCacheManager;
@ -81,7 +82,13 @@ public class StateEventResponseService {
List<StateEvent> remainEvents = new ArrayList<>(eventQueue.size()); List<StateEvent> remainEvents = new ArrayList<>(eventQueue.size());
eventQueue.drainTo(remainEvents); eventQueue.drainTo(remainEvents);
for (StateEvent event : remainEvents) { for (StateEvent event : remainEvents) {
try {
LoggerUtils.setWorkflowAndTaskInstanceIDMDC(event.getProcessInstanceId(), event.getTaskInstanceId());
this.persist(event); this.persist(event);
} finally {
LoggerUtils.removeWorkflowAndTaskInstanceIdMDC();
}
} }
} }
} }
@ -93,7 +100,7 @@ public class StateEventResponseService {
try { try {
eventQueue.put(stateEvent); eventQueue.put(stateEvent);
} catch (InterruptedException e) { } catch (InterruptedException e) {
logger.error("put state event : {} error :{}", stateEvent, e); logger.error("Put state event : {} error", stateEvent, e);
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
} }
} }
@ -109,18 +116,22 @@ public class StateEventResponseService {
@Override @Override
public void run() { public void run() {
logger.info("State event loop service started");
while (Stopper.isRunning()) { while (Stopper.isRunning()) {
try { try {
// if not task , blocking here // if not task , blocking here
StateEvent stateEvent = eventQueue.take(); StateEvent stateEvent = eventQueue.take();
LoggerUtils.setWorkflowAndTaskInstanceIDMDC(stateEvent.getProcessInstanceId(), stateEvent.getTaskInstanceId());
persist(stateEvent); persist(stateEvent);
} catch (InterruptedException e) { } catch (InterruptedException e) {
logger.warn("State event loop service interrupted, will stop this loop", e);
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
break; break;
} finally {
LoggerUtils.removeWorkflowAndTaskInstanceIdMDC();
} }
} }
logger.info("StateEventResponseWorker stopped"); logger.info("State event loop service stopped");
} }
} }
@ -135,6 +146,8 @@ public class StateEventResponseService {
private void persist(StateEvent stateEvent) { private void persist(StateEvent stateEvent) {
try { try {
if (!this.processInstanceExecCacheManager.contains(stateEvent.getProcessInstanceId())) { if (!this.processInstanceExecCacheManager.contains(stateEvent.getProcessInstanceId())) {
logger.warn("Persist event into workflow execute thread error, "
+ "cannot find the workflow instance from cache manager, event: {}", stateEvent);
writeResponse(stateEvent, ExecutionStatus.FAILURE); writeResponse(stateEvent, ExecutionStatus.FAILURE);
return; return;
} }
@ -152,7 +165,7 @@ public class StateEventResponseService {
workflowExecuteThreadPool.submitStateEvent(stateEvent); workflowExecuteThreadPool.submitStateEvent(stateEvent);
writeResponse(stateEvent, ExecutionStatus.SUCCESS); writeResponse(stateEvent, ExecutionStatus.SUCCESS);
} catch (Exception e) { } catch (Exception e) {
logger.error("persist event queue error, event: {}", stateEvent, e); logger.error("Persist event queue error, event: {}", stateEvent, e);
} }
} }

3
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/queue/TaskExecuteRunnable.java

@ -20,6 +20,7 @@ package org.apache.dolphinscheduler.server.master.processor.queue;
import org.apache.dolphinscheduler.common.enums.Event; import org.apache.dolphinscheduler.common.enums.Event;
import org.apache.dolphinscheduler.common.enums.StateEvent; import org.apache.dolphinscheduler.common.enums.StateEvent;
import org.apache.dolphinscheduler.common.enums.StateEventType; import org.apache.dolphinscheduler.common.enums.StateEventType;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.dao.entity.TaskInstance; import org.apache.dolphinscheduler.dao.entity.TaskInstance;
import org.apache.dolphinscheduler.plugin.task.api.enums.ExecutionStatus; import org.apache.dolphinscheduler.plugin.task.api.enums.ExecutionStatus;
import org.apache.dolphinscheduler.remote.command.TaskExecuteResponseAckCommand; import org.apache.dolphinscheduler.remote.command.TaskExecuteResponseAckCommand;
@ -72,11 +73,13 @@ public class TaskExecuteRunnable implements Runnable {
while (!this.events.isEmpty()) { while (!this.events.isEmpty()) {
TaskEvent event = this.events.peek(); TaskEvent event = this.events.peek();
try { try {
LoggerUtils.setWorkflowAndTaskInstanceIDMDC(event.getProcessInstanceId(), event.getTaskInstanceId());
persist(event); persist(event);
} catch (Exception e) { } catch (Exception e) {
logger.error("persist error, event:{}, error: {}", event, e); logger.error("persist error, event:{}, error: {}", event, e);
} finally { } finally {
this.events.remove(event); this.events.remove(event);
LoggerUtils.removeWorkflowAndTaskInstanceIdMDC();
} }
} }
} }

8
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/processor/queue/TaskExecuteThreadPool.java

@ -115,10 +115,10 @@ public class TaskExecuteThreadPool extends ThreadPoolTaskExecutor {
@Override @Override
public void onFailure(Throwable ex) { public void onFailure(Throwable ex) {
Integer processInstanceId = taskExecuteThread.getProcessInstanceId(); Integer processInstanceId = taskExecuteThread.getProcessInstanceId();
logger.error("persist event failed processInstanceId: {}", processInstanceId, ex); logger.error("[WorkflowInstance-{}] persist event failed", processInstanceId, ex);
if (!processInstanceExecCacheManager.contains(processInstanceId)) { if (!processInstanceExecCacheManager.contains(processInstanceId)) {
taskExecuteThreadMap.remove(processInstanceId); taskExecuteThreadMap.remove(processInstanceId);
logger.info("Cannot find processInstance from cacheManager, remove process instance from threadMap: {}", logger.info("[WorkflowInstance-{}] Cannot find processInstance from cacheManager, remove process instance from threadMap",
processInstanceId); processInstanceId);
} }
multiThreadFilterMap.remove(taskExecuteThread.getKey()); multiThreadFilterMap.remove(taskExecuteThread.getKey());
@ -127,10 +127,10 @@ public class TaskExecuteThreadPool extends ThreadPoolTaskExecutor {
@Override @Override
public void onSuccess(Object result) { public void onSuccess(Object result) {
Integer processInstanceId = taskExecuteThread.getProcessInstanceId(); Integer processInstanceId = taskExecuteThread.getProcessInstanceId();
logger.info("persist events succeeded, processInstanceId: {}", processInstanceId); logger.info("[WorkflowInstance-{}] persist events succeeded", processInstanceId);
if (!processInstanceExecCacheManager.contains(processInstanceId)) { if (!processInstanceExecCacheManager.contains(processInstanceId)) {
taskExecuteThreadMap.remove(processInstanceId); taskExecuteThreadMap.remove(processInstanceId);
logger.info("Cannot find processInstance from cacheManager, remove process instance from threadMap: {}", logger.info("[WorkflowInstance-{}] Cannot find processInstance from cacheManager, remove process instance from threadMap",
processInstanceId); processInstanceId);
} }
multiThreadFilterMap.remove(taskExecuteThread.getKey()); multiThreadFilterMap.remove(taskExecuteThread.getKey());

13
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterRegistryClient.java

@ -98,7 +98,6 @@ public class MasterRegistryClient {
registryClient.addConnectionStateListener(new MasterConnectionStateListener(getCurrentNodePath(), registryClient)); registryClient.addConnectionStateListener(new MasterConnectionStateListener(getCurrentNodePath(), registryClient));
registryClient.subscribe(REGISTRY_DOLPHINSCHEDULER_NODE, new MasterRegistryDataListener()); registryClient.subscribe(REGISTRY_DOLPHINSCHEDULER_NODE, new MasterRegistryDataListener());
} catch (Exception e) { } catch (Exception e) {
logger.error("master start up exception", e);
throw new RegistryException("Master registry client start up error", e); throw new RegistryException("Master registry client start up error", e);
} }
} }
@ -185,7 +184,7 @@ public class MasterRegistryClient {
* Registry the current master server itself to registry. * Registry the current master server itself to registry.
*/ */
void registry() { void registry() {
logger.info("master node : {} registering to registry center...", masterAddress); logger.info("Master node : {} registering to registry center", masterAddress);
String localNodePath = getCurrentNodePath(); String localNodePath = getCurrentNodePath();
int masterHeartbeatInterval = masterConfig.getHeartbeatInterval(); int masterHeartbeatInterval = masterConfig.getHeartbeatInterval();
HeartBeatTask heartBeatTask = new HeartBeatTask(startupTime, HeartBeatTask heartBeatTask = new HeartBeatTask(startupTime,
@ -200,7 +199,7 @@ public class MasterRegistryClient {
registryClient.persistEphemeral(localNodePath, heartBeatTask.getHeartBeatInfo()); registryClient.persistEphemeral(localNodePath, heartBeatTask.getHeartBeatInfo());
while (!registryClient.checkNodeExists(NetUtils.getHost(), NodeType.MASTER)) { while (!registryClient.checkNodeExists(NetUtils.getHost(), NodeType.MASTER)) {
logger.warn("The current master server node:{} cannot find in registry....", NetUtils.getHost()); logger.warn("The current master server node:{} cannot find in registry", NetUtils.getHost());
ThreadUtils.sleep(SLEEP_TIME_MILLIS); ThreadUtils.sleep(SLEEP_TIME_MILLIS);
} }
@ -211,7 +210,7 @@ public class MasterRegistryClient {
registryClient.handleDeadServer(Collections.singleton(localNodePath), NodeType.MASTER, Constants.DELETE_OP); registryClient.handleDeadServer(Collections.singleton(localNodePath), NodeType.MASTER, Constants.DELETE_OP);
this.heartBeatExecutor.scheduleAtFixedRate(heartBeatTask, 0L, masterHeartbeatInterval, TimeUnit.SECONDS); this.heartBeatExecutor.scheduleAtFixedRate(heartBeatTask, 0L, masterHeartbeatInterval, TimeUnit.SECONDS);
logger.info("master node : {} registry to ZK successfully with heartBeatInterval : {}s", masterAddress, masterHeartbeatInterval); logger.info("Master node : {} registered to registry center successfully with heartBeatInterval : {}s", masterAddress, masterHeartbeatInterval);
} }
@ -220,12 +219,12 @@ public class MasterRegistryClient {
String address = getLocalAddress(); String address = getLocalAddress();
String localNodePath = getCurrentNodePath(); String localNodePath = getCurrentNodePath();
registryClient.remove(localNodePath); registryClient.remove(localNodePath);
logger.info("master node : {} unRegistry to register center.", address); logger.info("Master node : {} unRegistry to register center.", address);
heartBeatExecutor.shutdown(); heartBeatExecutor.shutdown();
logger.info("heartbeat executor shutdown"); logger.info("MasterServer heartbeat executor shutdown");
registryClient.close(); registryClient.close();
} catch (Exception e) { } catch (Exception e) {
logger.error("remove registry path exception ", e); logger.error("MasterServer remove registry path exception ", e);
} }
} }

16
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/EventExecuteService.java

@ -20,6 +20,7 @@ package org.apache.dolphinscheduler.server.master.runner;
import org.apache.dolphinscheduler.common.Constants; import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.thread.BaseDaemonThread; import org.apache.dolphinscheduler.common.thread.BaseDaemonThread;
import org.apache.dolphinscheduler.common.thread.Stopper; import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.server.master.cache.ProcessInstanceExecCacheManager; import org.apache.dolphinscheduler.server.master.cache.ProcessInstanceExecCacheManager;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@ -49,25 +50,36 @@ public class EventExecuteService extends BaseDaemonThread {
@Override @Override
public synchronized void start() { public synchronized void start() {
logger.info("Master Event execute service starting");
super.start(); super.start();
logger.info("Master Event execute service started");
} }
@Override @Override
public void run() { public void run() {
logger.info("Event service started");
while (Stopper.isRunning()) { while (Stopper.isRunning()) {
try { try {
eventHandler(); eventHandler();
TimeUnit.MILLISECONDS.sleep(Constants.SLEEP_TIME_MILLIS_SHORT); TimeUnit.MILLISECONDS.sleep(Constants.SLEEP_TIME_MILLIS_SHORT);
} catch (InterruptedException interruptedException) {
logger.warn("Master event service interrupted, will exit this loop", interruptedException);
Thread.currentThread().interrupt();
break;
} catch (Exception e) { } catch (Exception e) {
logger.error("Event service thread error", e); logger.error("Master event execute service error", e);
} }
} }
} }
private void eventHandler() { private void eventHandler() {
for (WorkflowExecuteRunnable workflowExecuteThread : this.processInstanceExecCacheManager.getAll()) { for (WorkflowExecuteRunnable workflowExecuteThread : this.processInstanceExecCacheManager.getAll()) {
try {
LoggerUtils.setWorkflowInstanceIdMDC(workflowExecuteThread.getProcessInstance().getId());
workflowExecuteThreadPool.executeEvent(workflowExecuteThread); workflowExecuteThreadPool.executeEvent(workflowExecuteThread);
} finally {
LoggerUtils.removeWorkflowInstanceIdMDC();
}
} }
} }
} }

5
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/FailoverExecuteThread.java

@ -49,7 +49,9 @@ public class FailoverExecuteThread extends BaseDaemonThread {
@Override @Override
public synchronized void start() { public synchronized void start() {
logger.info("Master failover thread staring");
super.start(); super.start();
logger.info("Master failover thread stared");
} }
@Override @Override
@ -57,14 +59,13 @@ public class FailoverExecuteThread extends BaseDaemonThread {
// when startup, wait 10s for ready // when startup, wait 10s for ready
ThreadUtils.sleep(Constants.SLEEP_TIME_MILLIS * 10); ThreadUtils.sleep(Constants.SLEEP_TIME_MILLIS * 10);
logger.info("failover execute thread started");
while (Stopper.isRunning()) { while (Stopper.isRunning()) {
try { try {
// todo: DO we need to schedule a task to do this kind of check // todo: DO we need to schedule a task to do this kind of check
// This kind of check may only need to be executed when a master server start // This kind of check may only need to be executed when a master server start
failoverService.checkMasterFailover(); failoverService.checkMasterFailover();
} catch (Exception e) { } catch (Exception e) {
logger.error("failover execute error", e); logger.error("Master failover thread execute error", e);
} finally { } finally {
ThreadUtils.sleep(Constants.SLEEP_TIME_MILLIS * masterConfig.getFailoverInterval() * 60); ThreadUtils.sleep(Constants.SLEEP_TIME_MILLIS * masterConfig.getFailoverInterval() * 60);
} }

46
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/MasterSchedulerService.java

@ -22,6 +22,7 @@ import org.apache.dolphinscheduler.common.enums.SlotCheckState;
import org.apache.dolphinscheduler.common.thread.BaseDaemonThread; import org.apache.dolphinscheduler.common.thread.BaseDaemonThread;
import org.apache.dolphinscheduler.common.thread.Stopper; import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.common.thread.ThreadUtils; import org.apache.dolphinscheduler.common.thread.ThreadUtils;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.common.utils.NetUtils; import org.apache.dolphinscheduler.common.utils.NetUtils;
import org.apache.dolphinscheduler.common.utils.OSUtils; import org.apache.dolphinscheduler.common.utils.OSUtils;
import org.apache.dolphinscheduler.dao.entity.Command; import org.apache.dolphinscheduler.dao.entity.Command;
@ -111,20 +112,23 @@ public class MasterSchedulerService extends BaseDaemonThread {
* constructor of MasterSchedulerService * constructor of MasterSchedulerService
*/ */
public void init() { public void init() {
this.masterPrepareExecService = (ThreadPoolExecutor) ThreadUtils.newDaemonFixedThreadExecutor("Master-Pre-Exec-Thread", masterConfig.getPreExecThreads()); this.masterPrepareExecService = (ThreadPoolExecutor) ThreadUtils.newDaemonFixedThreadExecutor("MasterPreExecThread", masterConfig.getPreExecThreads());
NettyClientConfig clientConfig = new NettyClientConfig(); NettyClientConfig clientConfig = new NettyClientConfig();
this.nettyRemotingClient = new NettyRemotingClient(clientConfig); this.nettyRemotingClient = new NettyRemotingClient(clientConfig);
} }
@Override @Override
public synchronized void start() { public synchronized void start() {
logger.info("Master schedule service starting..");
this.stateWheelExecuteThread.start(); this.stateWheelExecuteThread.start();
super.start(); super.start();
logger.info("Master schedule service started...");
} }
public void close() { public void close() {
logger.info("Master schedule service stopping...");
nettyRemotingClient.close(); nettyRemotingClient.close();
logger.info("master schedule service stopped..."); logger.info("Master schedule service stopped...");
} }
/** /**
@ -132,7 +136,6 @@ public class MasterSchedulerService extends BaseDaemonThread {
*/ */
@Override @Override
public void run() { public void run() {
logger.info("master scheduler started");
while (Stopper.isRunning()) { while (Stopper.isRunning()) {
try { try {
boolean runCheckFlag = OSUtils.checkResource(masterConfig.getMaxCpuLoadAvg(), masterConfig.getReservedMemory()); boolean runCheckFlag = OSUtils.checkResource(masterConfig.getMaxCpuLoadAvg(), masterConfig.getReservedMemory());
@ -142,8 +145,12 @@ public class MasterSchedulerService extends BaseDaemonThread {
continue; continue;
} }
scheduleProcess(); scheduleProcess();
} catch (InterruptedException interruptedException) {
logger.warn("Master schedule service interrupted, close the loop", interruptedException);
Thread.currentThread().interrupt();
break;
} catch (Exception e) { } catch (Exception e) {
logger.error("master scheduler thread error", e); logger.error("Master schedule service loop command error", e);
} }
} }
} }
@ -152,7 +159,7 @@ public class MasterSchedulerService extends BaseDaemonThread {
* 1. get command by slot * 1. get command by slot
* 2. donot handle command if slot is empty * 2. donot handle command if slot is empty
*/ */
private void scheduleProcess() throws Exception { private void scheduleProcess() throws InterruptedException {
List<Command> commands = findCommands(); List<Command> commands = findCommands();
if (CollectionUtils.isEmpty(commands)) { if (CollectionUtils.isEmpty(commands)) {
//indicate that no command ,sleep for 1s //indicate that no command ,sleep for 1s
@ -167,7 +174,9 @@ public class MasterSchedulerService extends BaseDaemonThread {
MasterServerMetrics.incMasterConsumeCommand(commands.size()); MasterServerMetrics.incMasterConsumeCommand(commands.size());
for (ProcessInstance processInstance : processInstances) { for (ProcessInstance processInstance : processInstances) {
try {
LoggerUtils.setWorkflowInstanceIdMDC(processInstance.getId());
logger.info("Master schedule service starting workflow instance");
WorkflowExecuteRunnable workflowExecuteRunnable = new WorkflowExecuteRunnable( WorkflowExecuteRunnable workflowExecuteRunnable = new WorkflowExecuteRunnable(
processInstance processInstance
, processService , processService
@ -181,10 +190,16 @@ public class MasterSchedulerService extends BaseDaemonThread {
stateWheelExecuteThread.addProcess4TimeoutCheck(processInstance); stateWheelExecuteThread.addProcess4TimeoutCheck(processInstance);
} }
workflowExecuteThreadPool.startWorkflow(workflowExecuteRunnable); workflowExecuteThreadPool.startWorkflow(workflowExecuteRunnable);
logger.info("Master schedule service started workflow instance");
} finally {
LoggerUtils.removeWorkflowInstanceIdMDC();
}
} }
} }
private List<ProcessInstance> command2ProcessInstance(List<Command> commands) { private List<ProcessInstance> command2ProcessInstance(List<Command> commands) throws InterruptedException {
logger.info("Master schedule service transforming command to ProcessInstance, commandSize: {}", commands.size());
List<ProcessInstance> processInstances = Collections.synchronizedList(new ArrayList<>(commands.size())); List<ProcessInstance> processInstances = Collections.synchronizedList(new ArrayList<>(commands.size()));
CountDownLatch latch = new CountDownLatch(commands.size()); CountDownLatch latch = new CountDownLatch(commands.size());
for (final Command command : commands) { for (final Command command : commands) {
@ -193,7 +208,7 @@ public class MasterSchedulerService extends BaseDaemonThread {
// slot check again // slot check again
SlotCheckState slotCheckState = slotCheck(command); SlotCheckState slotCheckState = slotCheck(command);
if (slotCheckState.equals(SlotCheckState.CHANGE) || slotCheckState.equals(SlotCheckState.INJECT)) { if (slotCheckState.equals(SlotCheckState.CHANGE) || slotCheckState.equals(SlotCheckState.INJECT)) {
logger.info("handle command {} skip, slot check state: {}", command.getId(), slotCheckState); logger.info("Master handle command {} skip, slot check state: {}", command.getId(), slotCheckState);
return; return;
} }
ProcessInstance processInstance = processService.handleCommand(logger, ProcessInstance processInstance = processService.handleCommand(logger,
@ -201,10 +216,10 @@ public class MasterSchedulerService extends BaseDaemonThread {
command); command);
if (processInstance != null) { if (processInstance != null) {
processInstances.add(processInstance); processInstances.add(processInstance);
logger.info("handle command {} end, create process instance {}", command.getId(), processInstance.getId()); logger.info("Master handle command {} end, create process instance {}", command.getId(), processInstance.getId());
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("handle command {} error ", command.getId(), e); logger.error("Master handle command {} error ", command.getId(), e);
processService.moveToErrorCommand(command, e.toString()); processService.moveToErrorCommand(command, e.toString());
} finally { } finally {
latch.countDown(); latch.countDown();
@ -212,13 +227,10 @@ public class MasterSchedulerService extends BaseDaemonThread {
}); });
} }
try {
// make sure to finish handling command each time before next scan // make sure to finish handling command each time before next scan
latch.await(); latch.await();
} catch (InterruptedException e) { logger.info("Master schedule service transformed command to ProcessInstance, commandSize: {}, processInstanceSize: {}",
logger.error("countDownLatch await error ", e); commands.size(), processInstances.size());
}
return processInstances; return processInstances;
} }
@ -231,6 +243,10 @@ public class MasterSchedulerService extends BaseDaemonThread {
int masterCount = ServerNodeManager.getMasterSize(); int masterCount = ServerNodeManager.getMasterSize();
if (masterCount > 0) { if (masterCount > 0) {
result = processService.findCommandPageBySlot(pageSize, pageNumber, masterCount, thisMasterSlot); result = processService.findCommandPageBySlot(pageSize, pageNumber, masterCount, thisMasterSlot);
if (CollectionUtils.isNotEmpty(result)) {
logger.info("Master schedule service loop command success, command size: {}, current slot: {}, total slot size: {}",
result.size(), thisMasterSlot, masterCount);
}
} }
} }
return result; return result;

112
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/StateWheelExecuteThread.java

@ -24,6 +24,7 @@ import org.apache.dolphinscheduler.common.enums.TimeoutFlag;
import org.apache.dolphinscheduler.common.thread.BaseDaemonThread; import org.apache.dolphinscheduler.common.thread.BaseDaemonThread;
import org.apache.dolphinscheduler.common.thread.Stopper; import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.common.utils.DateUtils; import org.apache.dolphinscheduler.common.utils.DateUtils;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.dao.entity.ProcessInstance; import org.apache.dolphinscheduler.dao.entity.ProcessInstance;
import org.apache.dolphinscheduler.dao.entity.TaskDefinition; import org.apache.dolphinscheduler.dao.entity.TaskDefinition;
import org.apache.dolphinscheduler.dao.entity.TaskInstance; import org.apache.dolphinscheduler.dao.entity.TaskInstance;
@ -43,6 +44,8 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import lombok.NonNull;
/** /**
* Check thread * Check thread
* 1. timeout task check * 1. timeout task check
@ -110,10 +113,16 @@ public class StateWheelExecuteThread extends BaseDaemonThread {
public void addProcess4TimeoutCheck(ProcessInstance processInstance) { public void addProcess4TimeoutCheck(ProcessInstance processInstance) {
processInstanceTimeoutCheckList.add(processInstance.getId()); processInstanceTimeoutCheckList.add(processInstance.getId());
logger.info("Success add workflow instance into timeout check list");
} }
public void removeProcess4TimeoutCheck(ProcessInstance processInstance) { public void removeProcess4TimeoutCheck(ProcessInstance processInstance) {
processInstanceTimeoutCheckList.remove(processInstance.getId()); boolean removeFlag = processInstanceTimeoutCheckList.remove(processInstance.getId());
if (removeFlag) {
logger.info("Success remove workflow instance from timeout check list");
} else {
logger.warn("Failed to remove workflow instance from timeout check list");
}
} }
private void checkProcess4Timeout() { private void checkProcess4Timeout() {
@ -121,106 +130,95 @@ public class StateWheelExecuteThread extends BaseDaemonThread {
return; return;
} }
for (Integer processInstanceId : processInstanceTimeoutCheckList) { for (Integer processInstanceId : processInstanceTimeoutCheckList) {
if (processInstanceId == null) {
continue;
}
WorkflowExecuteRunnable workflowExecuteThread = processInstanceExecCacheManager.getByProcessInstanceId(processInstanceId); WorkflowExecuteRunnable workflowExecuteThread = processInstanceExecCacheManager.getByProcessInstanceId(processInstanceId);
if (workflowExecuteThread == null) { if (workflowExecuteThread == null) {
logger.warn("can not find workflowExecuteThread, this check event will remove, processInstanceId:{}", processInstanceId); logger.warn("Check workflow timeout failed, can not find workflowExecuteThread from cache manager, will remove this workflowInstance from check list");
processInstanceTimeoutCheckList.remove(processInstanceId); processInstanceTimeoutCheckList.remove(processInstanceId);
continue; continue;
} }
ProcessInstance processInstance = workflowExecuteThread.getProcessInstance(); ProcessInstance processInstance = workflowExecuteThread.getProcessInstance();
if (processInstance == null) { if (processInstance == null) {
logger.warn("Check workflow timeout failed, the workflowInstance is null");
continue; continue;
} }
long timeRemain = DateUtils.getRemainTime(processInstance.getStartTime(), (long) processInstance.getTimeout() * Constants.SEC_2_MINUTES_TIME_UNIT); long timeRemain = DateUtils.getRemainTime(processInstance.getStartTime(), (long) processInstance.getTimeout() * Constants.SEC_2_MINUTES_TIME_UNIT);
if (timeRemain < 0) { if (timeRemain < 0) {
logger.info("Workflow instance timeout, adding timeout event");
addProcessTimeoutEvent(processInstance); addProcessTimeoutEvent(processInstance);
processInstanceTimeoutCheckList.remove(processInstance.getId()); processInstanceTimeoutCheckList.remove(processInstance.getId());
logger.info("Workflow instance timeout, added timeout event");
} }
} }
} }
public void addTask4TimeoutCheck(ProcessInstance processInstance, TaskInstance taskInstance) { public void addTask4TimeoutCheck(@NonNull ProcessInstance processInstance, @NonNull TaskInstance taskInstance) {
TaskInstanceKey taskInstanceKey = TaskInstanceKey.getTaskInstanceKey(processInstance, taskInstance); TaskInstanceKey taskInstanceKey = TaskInstanceKey.getTaskInstanceKey(processInstance, taskInstance);
if (taskInstanceKey == null) { logger.info("Adding task instance into timeout check list");
logger.error("taskInstanceKey is null");
return;
}
if (taskInstanceTimeoutCheckList.contains(taskInstanceKey)) { if (taskInstanceTimeoutCheckList.contains(taskInstanceKey)) {
logger.warn("Task instance is already in timeout check list");
return; return;
} }
TaskDefinition taskDefinition = taskInstance.getTaskDefine(); TaskDefinition taskDefinition = taskInstance.getTaskDefine();
if (taskDefinition == null) { if (taskDefinition == null) {
logger.error("taskDefinition is null, taskId:{}", taskInstance.getId()); logger.error("Failed to add task instance into timeout check list, taskDefinition is null");
return; return;
} }
if (TimeoutFlag.OPEN == taskDefinition.getTimeoutFlag()) { if (TimeoutFlag.OPEN == taskDefinition.getTimeoutFlag()) {
taskInstanceTimeoutCheckList.add(taskInstanceKey); taskInstanceTimeoutCheckList.add(taskInstanceKey);
logger.info("Timeout flag is open, added task instance into timeout check list");
} }
if (taskInstance.isDependTask() || taskInstance.isSubProcess()) { if (taskInstance.isDependTask() || taskInstance.isSubProcess()) {
taskInstanceTimeoutCheckList.add(taskInstanceKey); taskInstanceTimeoutCheckList.add(taskInstanceKey);
logger.info("task instance is dependTask orSubProcess, added task instance into timeout check list");
} }
} }
public void removeTask4TimeoutCheck(ProcessInstance processInstance, TaskInstance taskInstance) { public void removeTask4TimeoutCheck(@NonNull ProcessInstance processInstance, @NonNull TaskInstance taskInstance) {
TaskInstanceKey taskInstanceKey = TaskInstanceKey.getTaskInstanceKey(processInstance, taskInstance); TaskInstanceKey taskInstanceKey = TaskInstanceKey.getTaskInstanceKey(processInstance, taskInstance);
if (taskInstanceKey == null) {
logger.error("taskInstanceKey is null");
return;
}
taskInstanceTimeoutCheckList.remove(taskInstanceKey); taskInstanceTimeoutCheckList.remove(taskInstanceKey);
logger.info("remove task instance from timeout check list");
} }
public void addTask4RetryCheck(ProcessInstance processInstance, TaskInstance taskInstance) { public void addTask4RetryCheck(@NonNull ProcessInstance processInstance, @NonNull TaskInstance taskInstance) {
logger.info("Adding task instance into retry check list");
TaskInstanceKey taskInstanceKey = TaskInstanceKey.getTaskInstanceKey(processInstance, taskInstance); TaskInstanceKey taskInstanceKey = TaskInstanceKey.getTaskInstanceKey(processInstance, taskInstance);
if (taskInstanceKey == null) {
logger.error("taskInstanceKey is null");
return;
}
if (taskInstanceRetryCheckList.contains(taskInstanceKey)) { if (taskInstanceRetryCheckList.contains(taskInstanceKey)) {
logger.warn("Task instance is already in retry check list");
return; return;
} }
TaskDefinition taskDefinition = taskInstance.getTaskDefine(); TaskDefinition taskDefinition = taskInstance.getTaskDefine();
if (taskDefinition == null) { if (taskDefinition == null) {
logger.error("taskDefinition is null, taskId:{}", taskInstance.getId()); logger.error("Add task instance into retry check list error, taskDefinition is null");
return; return;
} }
logger.debug("addTask4RetryCheck, taskCode:{}, processInstanceId:{}", taskInstance.getTaskCode(), taskInstance.getProcessInstanceId());
taskInstanceRetryCheckList.add(taskInstanceKey); taskInstanceRetryCheckList.add(taskInstanceKey);
logger.info("[WorkflowInstance-{}][TaskInstance-{}] Added task instance into retry check list",
processInstance.getId(), taskInstance.getId());
} }
public void removeTask4RetryCheck(ProcessInstance processInstance, TaskInstance taskInstance) { public void removeTask4RetryCheck(@NonNull ProcessInstance processInstance, @NonNull TaskInstance taskInstance) {
TaskInstanceKey taskInstanceKey = TaskInstanceKey.getTaskInstanceKey(processInstance, taskInstance); TaskInstanceKey taskInstanceKey = TaskInstanceKey.getTaskInstanceKey(processInstance, taskInstance);
if (taskInstanceKey == null) {
logger.error("taskInstanceKey is null");
return;
}
taskInstanceRetryCheckList.remove(taskInstanceKey); taskInstanceRetryCheckList.remove(taskInstanceKey);
logger.info("remove task instance from retry check list");
} }
public void addTask4StateCheck(ProcessInstance processInstance, TaskInstance taskInstance) { public void addTask4StateCheck(@NonNull ProcessInstance processInstance, @NonNull TaskInstance taskInstance) {
logger.info("Adding task instance into state check list");
TaskInstanceKey taskInstanceKey = TaskInstanceKey.getTaskInstanceKey(processInstance, taskInstance); TaskInstanceKey taskInstanceKey = TaskInstanceKey.getTaskInstanceKey(processInstance, taskInstance);
if (taskInstanceKey == null) {
logger.error("taskInstanceKey is null");
return;
}
if (taskInstanceStateCheckList.contains(taskInstanceKey)) { if (taskInstanceStateCheckList.contains(taskInstanceKey)) {
logger.warn("Task instance is already in state check list");
return; return;
} }
if (taskInstance.isDependTask() || taskInstance.isSubProcess()) { if (taskInstance.isDependTask() || taskInstance.isSubProcess()) {
taskInstanceStateCheckList.add(taskInstanceKey); taskInstanceStateCheckList.add(taskInstanceKey);
logger.info("Added task instance into state check list");
} }
} }
public void removeTask4StateCheck(ProcessInstance processInstance, TaskInstance taskInstance) { public void removeTask4StateCheck(@NonNull ProcessInstance processInstance, @NonNull TaskInstance taskInstance) {
TaskInstanceKey taskInstanceKey = TaskInstanceKey.getTaskInstanceKey(processInstance, taskInstance); TaskInstanceKey taskInstanceKey = TaskInstanceKey.getTaskInstanceKey(processInstance, taskInstance);
if (taskInstanceKey == null) {
logger.error("taskInstanceKey is null");
return;
}
taskInstanceStateCheckList.remove(taskInstanceKey); taskInstanceStateCheckList.remove(taskInstanceKey);
logger.info("Removed task instance from state check list");
} }
private void checkTask4Timeout() { private void checkTask4Timeout() {
@ -228,20 +226,21 @@ public class StateWheelExecuteThread extends BaseDaemonThread {
return; return;
} }
for (TaskInstanceKey taskInstanceKey : taskInstanceTimeoutCheckList) { for (TaskInstanceKey taskInstanceKey : taskInstanceTimeoutCheckList) {
try {
int processInstanceId = taskInstanceKey.getProcessInstanceId(); int processInstanceId = taskInstanceKey.getProcessInstanceId();
LoggerUtils.setWorkflowInstanceIdMDC(processInstanceId);
long taskCode = taskInstanceKey.getTaskCode(); long taskCode = taskInstanceKey.getTaskCode();
WorkflowExecuteRunnable workflowExecuteThread = processInstanceExecCacheManager.getByProcessInstanceId(processInstanceId); WorkflowExecuteRunnable workflowExecuteThread = processInstanceExecCacheManager.getByProcessInstanceId(processInstanceId);
if (workflowExecuteThread == null) { if (workflowExecuteThread == null) {
logger.warn("can not find workflowExecuteThread, this check event will remove, processInstanceId:{}, taskCode:{}", logger.warn("Check task instance timeout failed, can not find workflowExecuteThread from cache manager, will remove this check task");
processInstanceId, taskCode);
taskInstanceTimeoutCheckList.remove(taskInstanceKey); taskInstanceTimeoutCheckList.remove(taskInstanceKey);
continue; continue;
} }
Optional<TaskInstance> taskInstanceOptional = workflowExecuteThread.getActiveTaskInstanceByTaskCode(taskCode); Optional<TaskInstance> taskInstanceOptional = workflowExecuteThread.getActiveTaskInstanceByTaskCode(taskCode);
if (!taskInstanceOptional.isPresent()) { if (!taskInstanceOptional.isPresent()) {
logger.warn("can not find taskInstance from workflowExecuteThread, this check event will remove, processInstanceId:{}, taskCode:{}", logger.warn("Check task instance timeout failed, can not get taskInstance from workflowExecuteThread, taskCode: {}"
processInstanceId, taskCode); + "will remove this check task", taskCode);
taskInstanceTimeoutCheckList.remove(taskInstanceKey); taskInstanceTimeoutCheckList.remove(taskInstanceKey);
continue; continue;
} }
@ -249,10 +248,14 @@ public class StateWheelExecuteThread extends BaseDaemonThread {
if (TimeoutFlag.OPEN == taskInstance.getTaskDefine().getTimeoutFlag()) { if (TimeoutFlag.OPEN == taskInstance.getTaskDefine().getTimeoutFlag()) {
long timeRemain = DateUtils.getRemainTime(taskInstance.getStartTime(), (long) taskInstance.getTaskDefine().getTimeout() * Constants.SEC_2_MINUTES_TIME_UNIT); long timeRemain = DateUtils.getRemainTime(taskInstance.getStartTime(), (long) taskInstance.getTaskDefine().getTimeout() * Constants.SEC_2_MINUTES_TIME_UNIT);
if (timeRemain < 0) { if (timeRemain < 0) {
logger.info("Task instance is timeout, adding task timeout event and remove the check");
addTaskTimeoutEvent(taskInstance); addTaskTimeoutEvent(taskInstance);
taskInstanceTimeoutCheckList.remove(taskInstanceKey); taskInstanceTimeoutCheckList.remove(taskInstanceKey);
} }
} }
} finally {
LoggerUtils.removeWorkflowInstanceIdMDC();
}
} }
} }
@ -264,12 +267,14 @@ public class StateWheelExecuteThread extends BaseDaemonThread {
for (TaskInstanceKey taskInstanceKey : taskInstanceRetryCheckList) { for (TaskInstanceKey taskInstanceKey : taskInstanceRetryCheckList) {
int processInstanceId = taskInstanceKey.getProcessInstanceId(); int processInstanceId = taskInstanceKey.getProcessInstanceId();
long taskCode = taskInstanceKey.getTaskCode(); long taskCode = taskInstanceKey.getTaskCode();
try {
LoggerUtils.setWorkflowInstanceIdMDC(processInstanceId);
WorkflowExecuteRunnable workflowExecuteThread = processInstanceExecCacheManager.getByProcessInstanceId(processInstanceId); WorkflowExecuteRunnable workflowExecuteThread = processInstanceExecCacheManager.getByProcessInstanceId(processInstanceId);
if (workflowExecuteThread == null) { if (workflowExecuteThread == null) {
logger.warn("can not find workflowExecuteThread, this check event will remove, processInstanceId:{}, taskCode:{}", logger.warn("Task instance retry check failed, can not find workflowExecuteThread from cache manager, "
processInstanceId, taskCode); + "will remove this check task");
taskInstanceRetryCheckList.remove(taskInstanceKey); taskInstanceRetryCheckList.remove(taskInstanceKey);
continue; continue;
} }
@ -278,14 +283,14 @@ public class StateWheelExecuteThread extends BaseDaemonThread {
ProcessInstance processInstance = workflowExecuteThread.getProcessInstance(); ProcessInstance processInstance = workflowExecuteThread.getProcessInstance();
if (processInstance.getState() == ExecutionStatus.READY_STOP) { if (processInstance.getState() == ExecutionStatus.READY_STOP) {
logger.warn("The process instance is ready to stop, will send process stop event and remove the check task");
addProcessStopEvent(processInstance); addProcessStopEvent(processInstance);
taskInstanceRetryCheckList.remove(taskInstanceKey); taskInstanceRetryCheckList.remove(taskInstanceKey);
break; break;
} }
if (!taskInstanceOptional.isPresent()) { if (!taskInstanceOptional.isPresent()) {
logger.warn("can not find taskInstance from workflowExecuteThread, this check event will remove, processInstanceId:{}, taskCode:{}", logger.warn("Task instance retry check failed, can not find taskInstance from workflowExecuteThread, will remove this check");
processInstanceId, taskCode);
taskInstanceRetryCheckList.remove(taskInstanceKey); taskInstanceRetryCheckList.remove(taskInstanceKey);
continue; continue;
} }
@ -300,6 +305,9 @@ public class StateWheelExecuteThread extends BaseDaemonThread {
addTaskRetryEvent(taskInstance); addTaskRetryEvent(taskInstance);
taskInstanceRetryCheckList.remove(taskInstanceKey); taskInstanceRetryCheckList.remove(taskInstanceKey);
} }
} finally {
LoggerUtils.removeWorkflowInstanceIdMDC();
}
} }
} }
@ -311,17 +319,18 @@ public class StateWheelExecuteThread extends BaseDaemonThread {
int processInstanceId = taskInstanceKey.getProcessInstanceId(); int processInstanceId = taskInstanceKey.getProcessInstanceId();
long taskCode = taskInstanceKey.getTaskCode(); long taskCode = taskInstanceKey.getTaskCode();
try {
LoggerUtils.setTaskInstanceIdMDC(processInstanceId);
WorkflowExecuteRunnable workflowExecuteThread = processInstanceExecCacheManager.getByProcessInstanceId(processInstanceId); WorkflowExecuteRunnable workflowExecuteThread = processInstanceExecCacheManager.getByProcessInstanceId(processInstanceId);
if (workflowExecuteThread == null) { if (workflowExecuteThread == null) {
logger.warn("can not find workflowExecuteThread, this check event will remove, processInstanceId:{}, taskCode:{}", logger.warn("Task instance state check failed, can not find workflowExecuteThread from cache manager, will remove this check task");
processInstanceId, taskCode);
taskInstanceStateCheckList.remove(taskInstanceKey); taskInstanceStateCheckList.remove(taskInstanceKey);
continue; continue;
} }
Optional<TaskInstance> taskInstanceOptional = workflowExecuteThread.getActiveTaskInstanceByTaskCode(taskCode); Optional<TaskInstance> taskInstanceOptional = workflowExecuteThread.getActiveTaskInstanceByTaskCode(taskCode);
if (!taskInstanceOptional.isPresent()) { if (!taskInstanceOptional.isPresent()) {
logger.warn("can not find taskInstance from workflowExecuteThread, this check event will remove, processInstanceId:{}, taskCode:{}", logger.warn(
processInstanceId, taskCode); "Task instance state check failed, can not find taskInstance from workflowExecuteThread, will remove this check event");
taskInstanceStateCheckList.remove(taskInstanceKey); taskInstanceStateCheckList.remove(taskInstanceKey);
continue; continue;
} }
@ -330,6 +339,9 @@ public class StateWheelExecuteThread extends BaseDaemonThread {
continue; continue;
} }
addTaskStateChangeEvent(taskInstance); addTaskStateChangeEvent(taskInstance);
} finally {
LoggerUtils.removeWorkflowInstanceIdMDC();
}
} }
} }

11
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java

@ -46,6 +46,7 @@ import org.apache.dolphinscheduler.common.model.TaskNodeRelation;
import org.apache.dolphinscheduler.common.process.ProcessDag; import org.apache.dolphinscheduler.common.process.ProcessDag;
import org.apache.dolphinscheduler.common.utils.DateUtils; import org.apache.dolphinscheduler.common.utils.DateUtils;
import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.common.utils.NetUtils; import org.apache.dolphinscheduler.common.utils.NetUtils;
import org.apache.dolphinscheduler.common.utils.ParameterUtils; import org.apache.dolphinscheduler.common.utils.ParameterUtils;
import org.apache.dolphinscheduler.dao.entity.Command; import org.apache.dolphinscheduler.dao.entity.Command;
@ -274,12 +275,16 @@ public class WorkflowExecuteRunnable implements Runnable {
while (!this.stateEvents.isEmpty()) { while (!this.stateEvents.isEmpty()) {
try { try {
StateEvent stateEvent = this.stateEvents.peek(); StateEvent stateEvent = this.stateEvents.peek();
LoggerUtils.setWorkflowAndTaskInstanceIDMDC(stateEvent.getProcessInstanceId(), stateEvent.getTaskInstanceId());
if (stateEventHandler(stateEvent)) { if (stateEventHandler(stateEvent)) {
this.stateEvents.remove(stateEvent); this.stateEvents.remove(stateEvent);
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("state handle error:", e); logger.error("state handle error:", e);
} finally {
LoggerUtils.removeWorkflowAndTaskInstanceIdMDC();
} }
} }
} }
@ -776,9 +781,13 @@ public class WorkflowExecuteRunnable implements Runnable {
if (cmdParam.containsKey(Constants.CMD_PARAM_RECOVERY_START_NODE_STRING)) { if (cmdParam.containsKey(Constants.CMD_PARAM_RECOVERY_START_NODE_STRING)) {
cmdParam.remove(Constants.CMD_PARAM_RECOVERY_START_NODE_STRING); cmdParam.remove(Constants.CMD_PARAM_RECOVERY_START_NODE_STRING);
} }
if (cmdParam.containsKey(CMDPARAM_COMPLEMENT_DATA_SCHEDULE_DATE_LIST)) { if (cmdParam.containsKey(CMDPARAM_COMPLEMENT_DATA_SCHEDULE_DATE_LIST)) {
cmdParam.replace(CMDPARAM_COMPLEMENT_DATA_SCHEDULE_DATE_LIST, cmdParam.get(CMDPARAM_COMPLEMENT_DATA_SCHEDULE_DATE_LIST).substring(cmdParam.get(CMDPARAM_COMPLEMENT_DATA_SCHEDULE_DATE_LIST).indexOf(COMMA)+1)); cmdParam.replace(CMDPARAM_COMPLEMENT_DATA_SCHEDULE_DATE_LIST,
cmdParam.get(CMDPARAM_COMPLEMENT_DATA_SCHEDULE_DATE_LIST)
.substring(cmdParam.get(CMDPARAM_COMPLEMENT_DATA_SCHEDULE_DATE_LIST).indexOf(COMMA) + 1));
} }
if (cmdParam.containsKey(CMDPARAM_COMPLEMENT_DATA_START_DATE)) { if (cmdParam.containsKey(CMDPARAM_COMPLEMENT_DATA_START_DATE)) {
cmdParam.replace(CMDPARAM_COMPLEMENT_DATA_START_DATE, DateUtils.format(scheduleDate, YYYY_MM_DD_HH_MM_SS, null)); cmdParam.replace(CMDPARAM_COMPLEMENT_DATA_START_DATE, DateUtils.format(scheduleDate, YYYY_MM_DD_HH_MM_SS, null));
} }

27
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteThreadPool.java

@ -20,6 +20,7 @@ package org.apache.dolphinscheduler.server.master.runner;
import org.apache.dolphinscheduler.common.enums.Flag; import org.apache.dolphinscheduler.common.enums.Flag;
import org.apache.dolphinscheduler.common.enums.StateEvent; import org.apache.dolphinscheduler.common.enums.StateEvent;
import org.apache.dolphinscheduler.common.enums.StateEventType; import org.apache.dolphinscheduler.common.enums.StateEventType;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.common.utils.NetUtils; import org.apache.dolphinscheduler.common.utils.NetUtils;
import org.apache.dolphinscheduler.dao.entity.ProcessInstance; import org.apache.dolphinscheduler.dao.entity.ProcessInstance;
import org.apache.dolphinscheduler.dao.entity.TaskInstance; import org.apache.dolphinscheduler.dao.entity.TaskInstance;
@ -47,6 +48,8 @@ import org.springframework.util.concurrent.ListenableFutureCallback;
import com.google.common.base.Strings; import com.google.common.base.Strings;
import lombok.NonNull;
/** /**
* Used to execute {@link WorkflowExecuteRunnable}, when * Used to execute {@link WorkflowExecuteRunnable}, when
*/ */
@ -78,7 +81,7 @@ public class WorkflowExecuteThreadPool extends ThreadPoolTaskExecutor {
@PostConstruct @PostConstruct
private void init() { private void init() {
this.setDaemon(true); this.setDaemon(true);
this.setThreadNamePrefix("Workflow-Execute-Thread-"); this.setThreadNamePrefix("WorkflowExecuteThread-");
this.setMaxPoolSize(masterConfig.getExecThreads()); this.setMaxPoolSize(masterConfig.getExecThreads());
this.setCorePoolSize(masterConfig.getExecThreads()); this.setCorePoolSize(masterConfig.getExecThreads());
} }
@ -89,10 +92,11 @@ public class WorkflowExecuteThreadPool extends ThreadPoolTaskExecutor {
public void submitStateEvent(StateEvent stateEvent) { public void submitStateEvent(StateEvent stateEvent) {
WorkflowExecuteRunnable workflowExecuteThread = processInstanceExecCacheManager.getByProcessInstanceId(stateEvent.getProcessInstanceId()); WorkflowExecuteRunnable workflowExecuteThread = processInstanceExecCacheManager.getByProcessInstanceId(stateEvent.getProcessInstanceId());
if (workflowExecuteThread == null) { if (workflowExecuteThread == null) {
logger.warn("workflowExecuteThread is null, stateEvent:{}", stateEvent); logger.warn("Submit state event error, cannot from workflowExecuteThread from cache manager, stateEvent:{}", stateEvent);
return; return;
} }
workflowExecuteThread.addStateEvent(stateEvent); workflowExecuteThread.addStateEvent(stateEvent);
logger.info("Submit state event success, stateEvent: {}", stateEvent);
} }
/** /**
@ -111,7 +115,7 @@ public class WorkflowExecuteThreadPool extends ThreadPoolTaskExecutor {
return; return;
} }
if (multiThreadFilterMap.containsKey(workflowExecuteThread.getKey())) { if (multiThreadFilterMap.containsKey(workflowExecuteThread.getKey())) {
logger.warn("The workflow:{} has been executed by another thread", workflowExecuteThread.getKey()); logger.warn("The workflow has been executed by another thread");
return; return;
} }
multiThreadFilterMap.put(workflowExecuteThread.getKey(), workflowExecuteThread); multiThreadFilterMap.put(workflowExecuteThread.getKey(), workflowExecuteThread);
@ -120,24 +124,31 @@ public class WorkflowExecuteThreadPool extends ThreadPoolTaskExecutor {
future.addCallback(new ListenableFutureCallback() { future.addCallback(new ListenableFutureCallback() {
@Override @Override
public void onFailure(Throwable ex) { public void onFailure(Throwable ex) {
logger.error("handle events {} failed", processInstanceId, ex); LoggerUtils.setWorkflowInstanceIdMDC(processInstanceId);
try {
logger.error("Workflow instance events handle failed", ex);
multiThreadFilterMap.remove(workflowExecuteThread.getKey()); multiThreadFilterMap.remove(workflowExecuteThread.getKey());
} finally {
LoggerUtils.removeWorkflowInstanceIdMDC();
}
} }
@Override @Override
public void onSuccess(Object result) { public void onSuccess(Object result) {
try { try {
LoggerUtils.setWorkflowInstanceIdMDC(workflowExecuteThread.getProcessInstance().getId());
if (workflowExecuteThread.workFlowFinish()) { if (workflowExecuteThread.workFlowFinish()) {
stateWheelExecuteThread.removeProcess4TimeoutCheck(workflowExecuteThread.getProcessInstance()); stateWheelExecuteThread.removeProcess4TimeoutCheck(workflowExecuteThread.getProcessInstance());
processInstanceExecCacheManager.removeByProcessInstanceId(processInstanceId); processInstanceExecCacheManager.removeByProcessInstanceId(processInstanceId);
notifyProcessChanged(workflowExecuteThread.getProcessInstance()); notifyProcessChanged(workflowExecuteThread.getProcessInstance());
logger.info("process instance {} finished.", processInstanceId); logger.info("Workflow instance is finished.");
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("handle events {} success, but notify changed error", processInstanceId, e); logger.error("Workflow instance is finished, but notify changed error", e);
} finally { } finally {
// make sure the process has been removed from multiThreadFilterMap // make sure the process has been removed from multiThreadFilterMap
multiThreadFilterMap.remove(workflowExecuteThread.getKey()); multiThreadFilterMap.remove(workflowExecuteThread.getKey());
LoggerUtils.removeWorkflowInstanceIdMDC();
} }
} }
}); });
@ -166,9 +177,9 @@ public class WorkflowExecuteThreadPool extends ThreadPoolTaskExecutor {
/** /**
* notify myself * notify myself
*/ */
private void notifyMyself(ProcessInstance processInstance, TaskInstance taskInstance) { private void notifyMyself(@NonNull ProcessInstance processInstance, @NonNull TaskInstance taskInstance) {
logger.info("notify process {} task {} state change", processInstance.getId(), taskInstance.getId());
if (!processInstanceExecCacheManager.contains(processInstance.getId())) { if (!processInstanceExecCacheManager.contains(processInstance.getId())) {
logger.warn("The execute cache manager doesn't contains this workflow instance");
return; return;
} }
StateEvent stateEvent = new StateEvent(); StateEvent stateEvent = new StateEvent();

9
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/task/TaskProcessorFactory.java

@ -30,9 +30,12 @@ import java.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import lombok.experimental.UtilityClass;
/** /**
* the factory to create task processor * the factory to create task processor
*/ */
@UtilityClass
public final class TaskProcessorFactory { public final class TaskProcessorFactory {
private static final Logger logger = LoggerFactory.getLogger(TaskProcessorFactory.class); private static final Logger logger = LoggerFactory.getLogger(TaskProcessorFactory.class);
@ -46,7 +49,7 @@ public final class TaskProcessorFactory {
try { try {
PROCESS_MAP.put(iTaskProcessor.getType(), (Constructor<ITaskProcessor>) iTaskProcessor.getClass().getConstructor()); PROCESS_MAP.put(iTaskProcessor.getType(), (Constructor<ITaskProcessor>) iTaskProcessor.getClass().getConstructor());
} catch (NoSuchMethodException e) { } catch (NoSuchMethodException e) {
throw new IllegalArgumentException("The task processor should has a no args constructor"); throw new IllegalArgumentException("The task processor should has a no args constructor", e);
} }
} }
} }
@ -57,7 +60,6 @@ public final class TaskProcessorFactory {
} }
Constructor<ITaskProcessor> iTaskProcessorConstructor = PROCESS_MAP.get(type); Constructor<ITaskProcessor> iTaskProcessorConstructor = PROCESS_MAP.get(type);
if (iTaskProcessorConstructor == null) { if (iTaskProcessorConstructor == null) {
logger.warn("ITaskProcessor could not found for taskType: {}", type);
iTaskProcessorConstructor = PROCESS_MAP.get(DEFAULT_PROCESSOR); iTaskProcessorConstructor = PROCESS_MAP.get(DEFAULT_PROCESSOR);
} }
@ -74,7 +76,4 @@ public final class TaskProcessorFactory {
return PROCESS_MAP.containsKey(type); return PROCESS_MAP.containsKey(type);
} }
private TaskProcessorFactory() {
throw new UnsupportedOperationException("TaskProcessorFactory cannot be instantiated");
}
} }

24
dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/service/FailoverService.java

@ -17,8 +17,7 @@
package org.apache.dolphinscheduler.server.master.service; package org.apache.dolphinscheduler.server.master.service;
import io.micrometer.core.annotation.Counted; import static com.google.common.base.Preconditions.checkNotNull;
import io.micrometer.core.annotation.Timed;
import org.apache.dolphinscheduler.common.Constants; import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.enums.NodeType; import org.apache.dolphinscheduler.common.enums.NodeType;
@ -55,6 +54,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import io.micrometer.core.annotation.Counted;
import io.micrometer.core.annotation.Timed;
/** /**
* failover service * failover service
*/ */
@ -66,12 +68,14 @@ public class FailoverService {
private final ProcessService processService; private final ProcessService processService;
private final WorkflowExecuteThreadPool workflowExecuteThreadPool; private final WorkflowExecuteThreadPool workflowExecuteThreadPool;
public FailoverService(RegistryClient registryClient, MasterConfig masterConfig, ProcessService processService, public FailoverService(RegistryClient registryClient,
MasterConfig masterConfig,
ProcessService processService,
WorkflowExecuteThreadPool workflowExecuteThreadPool) { WorkflowExecuteThreadPool workflowExecuteThreadPool) {
this.registryClient = registryClient; this.registryClient = checkNotNull(registryClient);
this.masterConfig = masterConfig; this.masterConfig = checkNotNull(masterConfig);
this.processService = processService; this.processService = checkNotNull(processService);
this.workflowExecuteThreadPool = workflowExecuteThreadPool; this.workflowExecuteThreadPool = checkNotNull(workflowExecuteThreadPool);
} }
/** /**
@ -84,7 +88,7 @@ public class FailoverService {
if (CollectionUtils.isEmpty(hosts)) { if (CollectionUtils.isEmpty(hosts)) {
return; return;
} }
LOGGER.info("{} begin to failover hosts:{}", getLocalAddress(), hosts); LOGGER.info("Master failover service {} begin to failover hosts:{}", getLocalAddress(), hosts);
for (String host : hosts) { for (String host : hosts) {
failoverMasterWithLock(host); failoverMasterWithLock(host);
@ -274,7 +278,7 @@ public class FailoverService {
while (iterator.hasNext()) { while (iterator.hasNext()) {
String host = iterator.next(); String host = iterator.next();
if (registryClient.checkNodeExists(host, NodeType.MASTER)) { if (registryClient.checkNodeExists(host, NodeType.MASTER)) {
if (!host.equals(getLocalAddress())) { if (!getLocalAddress().equals(host)) {
iterator.remove(); iterator.remove();
} }
} }
@ -294,7 +298,7 @@ public class FailoverService {
boolean taskNeedFailover = true; boolean taskNeedFailover = true;
if (taskInstance == null) { if (taskInstance == null) {
LOGGER.error("failover task instance error, taskInstance is null"); LOGGER.error("Master failover task instance error, taskInstance is null");
return false; return false;
} }

4
dolphinscheduler-master/src/main/resources/logback-spring.xml

@ -21,7 +21,7 @@
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender"> <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder> <encoder>
<pattern> <pattern>
[%level] %date{yyyy-MM-dd HH:mm:ss.SSS Z} %logger{96}:[%line] - %msg%n [%level] %date{yyyy-MM-dd HH:mm:ss.SSS Z} %logger{96}:[%line] - [WorkflowInstance-%X{workflowInstanceId:-0}][TaskInstance-%X{taskInstanceId:-0}] - %msg%n
</pattern> </pattern>
<charset>UTF-8</charset> <charset>UTF-8</charset>
</encoder> </encoder>
@ -57,7 +57,7 @@
</rollingPolicy> </rollingPolicy>
<encoder> <encoder>
<pattern> <pattern>
[%level] %date{yyyy-MM-dd HH:mm:ss.SSS Z} %logger{96}:[%line] - %msg%n [%level] %date{yyyy-MM-dd HH:mm:ss.SSS Z} %logger{96}:[%line] - [WorkflowInstance-%X{workflowInstanceId:-0}][TaskInstance-%X{taskInstanceId:-0}] - %msg%n
</pattern> </pattern>
<charset>UTF-8</charset> <charset>UTF-8</charset>
</encoder> </encoder>

1
dolphinscheduler-scheduler-plugin/dolphinscheduler-scheduler-quartz/src/main/java/org/apache/dolphinscheduler/scheduler/quartz/QuartzScheduler.java

@ -17,7 +17,6 @@
package org.apache.dolphinscheduler.scheduler.quartz; package org.apache.dolphinscheduler.scheduler.quartz;
import static org.quartz.CronScheduleBuilder.cronSchedule; import static org.quartz.CronScheduleBuilder.cronSchedule;
import static org.quartz.JobBuilder.newJob; import static org.quartz.JobBuilder.newJob;
import static org.quartz.TriggerBuilder.newTrigger; import static org.quartz.TriggerBuilder.newTrigger;

21
dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/alert/AlertClientService.java

@ -25,6 +25,8 @@ import org.apache.dolphinscheduler.remote.config.NettyClientConfig;
import org.apache.dolphinscheduler.remote.utils.Host; import org.apache.dolphinscheduler.remote.utils.Host;
import org.apache.dolphinscheduler.remote.utils.JsonSerializer; import org.apache.dolphinscheduler.remote.utils.JsonSerializer;
import java.util.concurrent.atomic.AtomicBoolean;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -36,7 +38,7 @@ public class AlertClientService implements AutoCloseable {
private final NettyRemotingClient client; private final NettyRemotingClient client;
private volatile boolean isRunning; private final AtomicBoolean isRunning;
private String host; private String host;
@ -53,16 +55,14 @@ public class AlertClientService implements AutoCloseable {
public AlertClientService() { public AlertClientService() {
this.clientConfig = new NettyClientConfig(); this.clientConfig = new NettyClientConfig();
this.client = new NettyRemotingClient(clientConfig); this.client = new NettyRemotingClient(clientConfig);
this.isRunning = true; this.isRunning = new AtomicBoolean(true);
} }
/** /**
* alert client * alert client
*/ */
public AlertClientService(String host, int port) { public AlertClientService(String host, int port) {
this.clientConfig = new NettyClientConfig(); this();
this.client = new NettyRemotingClient(clientConfig);
this.isRunning = true;
this.host = host; this.host = host;
this.port = port; this.port = port;
} }
@ -72,9 +72,14 @@ public class AlertClientService implements AutoCloseable {
*/ */
@Override @Override
public void close() { public void close() {
if (isRunning.compareAndSet(true, false)) {
logger.warn("Alert client is already closed");
return;
}
logger.info("Alter client closing");
this.client.close(); this.client.close();
this.isRunning = false; logger.info("Alter client closed");
logger.info("alter client closed");
} }
/** /**
@ -116,6 +121,6 @@ public class AlertClientService implements AutoCloseable {
} }
public boolean isRunning() { public boolean isRunning() {
return isRunning; return isRunning.get();
} }
} }

5
dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/task/TaskPluginManager.java

@ -24,6 +24,7 @@ import org.apache.dolphinscheduler.dao.PluginDao;
import org.apache.dolphinscheduler.dao.entity.PluginDefine; import org.apache.dolphinscheduler.dao.entity.PluginDefine;
import org.apache.dolphinscheduler.plugin.task.api.TaskChannel; import org.apache.dolphinscheduler.plugin.task.api.TaskChannel;
import org.apache.dolphinscheduler.plugin.task.api.TaskChannelFactory; import org.apache.dolphinscheduler.plugin.task.api.TaskChannelFactory;
import org.apache.dolphinscheduler.plugin.task.api.TaskPluginException;
import org.apache.dolphinscheduler.plugin.task.api.parameters.AbstractParameters; import org.apache.dolphinscheduler.plugin.task.api.parameters.AbstractParameters;
import org.apache.dolphinscheduler.plugin.task.api.parameters.ParametersNode; import org.apache.dolphinscheduler.plugin.task.api.parameters.ParametersNode;
import org.apache.dolphinscheduler.spi.params.PluginParamsTransfer; import org.apache.dolphinscheduler.spi.params.PluginParamsTransfer;
@ -93,7 +94,7 @@ public class TaskPluginManager {
logger.info("Registering task plugin: {}", name); logger.info("Registering task plugin: {}", name);
if (!names.add(name)) { if (!names.add(name)) {
throw new IllegalStateException(format("Duplicate task plugins named '%s'", name)); throw new TaskPluginException(format("Duplicate task plugins named '%s'", name));
} }
loadTaskChannel(factory); loadTaskChannel(factory);
@ -106,7 +107,7 @@ public class TaskPluginManager {
PluginDefine pluginDefine = new PluginDefine(name, PluginType.TASK.getDesc(), paramsJson); PluginDefine pluginDefine = new PluginDefine(name, PluginType.TASK.getDesc(), paramsJson);
int count = pluginDao.addOrUpdatePluginDefine(pluginDefine); int count = pluginDao.addOrUpdatePluginDefine(pluginDefine);
if (count <= 0) { if (count <= 0) {
throw new RuntimeException("Failed to update task plugin: " + name); throw new TaskPluginException("Failed to update task plugin: " + name);
} }
}); });
} }

4
dolphinscheduler-standalone-server/src/main/resources/logback-spring.xml

@ -22,7 +22,7 @@
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender"> <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder> <encoder>
<pattern> <pattern>
[%level] %date{yyyy-MM-dd HH:mm:ss.SSS Z} %logger{96}:[%line] - %msg%n [%level] %date{yyyy-MM-dd HH:mm:ss.SSS Z} %logger{96}:[%line] - [WorkflowInstance-%X{workflowInstanceId:-0}][TaskInstance-%X{taskInstanceId:-0}] - %msg%n
</pattern> </pattern>
<charset>UTF-8</charset> <charset>UTF-8</charset>
</encoder> </encoder>
@ -63,7 +63,7 @@
<file>${log.base}/${taskAppId}.log</file> <file>${log.base}/${taskAppId}.log</file>
<encoder> <encoder>
<pattern> <pattern>
[%level] %date{yyyy-MM-dd HH:mm:ss.SSS Z} [%thread] %logger{96}:[%line] - %messsage%n [%level] %date{yyyy-MM-dd HH:mm:ss.SSS Z} [%thread] %logger{96}:[%line] - [WorkflowInstance-%X{workflowInstanceId:-0}][TaskInstance-%X{taskInstanceId:-0}] - %messsage%n
</pattern> </pattern>
<charset>UTF-8</charset> <charset>UTF-8</charset>
</encoder> </encoder>

15
dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/ProcessUtils.java

@ -27,6 +27,8 @@ import java.util.regex.Pattern;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import lombok.NonNull;
public final class ProcessUtils { public final class ProcessUtils {
private static final Logger logger = LoggerFactory.getLogger(ProcessUtils.class); private static final Logger logger = LoggerFactory.getLogger(ProcessUtils.class);
@ -48,13 +50,13 @@ public final class ProcessUtils {
/** /**
* kill tasks according to different task types. * kill tasks according to different task types.
*/ */
public static void kill(TaskExecutionContext request) { public static boolean kill(@NonNull TaskExecutionContext request) {
try { try {
logger.info("Begin kill task instance, processId: {}", request.getProcessId());
int processId = request.getProcessId(); int processId = request.getProcessId();
if (processId == 0) { if (processId == 0) {
logger.error("process kill failed, process id :{}, task id:{}", logger.error("Task instance kill failed, processId is not exist");
processId, request.getTaskInstanceId()); return false;
return;
} }
String cmd = String.format("kill -9 %s", getPidsStr(processId)); String cmd = String.format("kill -9 %s", getPidsStr(processId));
@ -62,8 +64,11 @@ public final class ProcessUtils {
logger.info("process id:{}, cmd:{}", processId, cmd); logger.info("process id:{}, cmd:{}", processId, cmd);
OSUtils.exeCmd(cmd); OSUtils.exeCmd(cmd);
logger.info("Success kill task instance, processId: {}", request.getProcessId());
return true;
} catch (Exception e) { } catch (Exception e) {
logger.error("kill task failed", e); logger.error("Kill task instance error, processId: {}", request.getProcessId(), e);
return false;
} }
} }

25
dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/TaskPluginException.java

@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.plugin.task.api;
public class TaskPluginException extends RuntimeException {
public TaskPluginException(String message) {
super(message);
}
}

111
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/WorkerServer.java

@ -21,19 +21,11 @@ import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.IStoppable; import org.apache.dolphinscheduler.common.IStoppable;
import org.apache.dolphinscheduler.common.enums.NodeType; import org.apache.dolphinscheduler.common.enums.NodeType;
import org.apache.dolphinscheduler.common.thread.Stopper; import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.plugin.task.api.ProcessUtils;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext; import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContextCacheManager; import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContextCacheManager;
import org.apache.dolphinscheduler.remote.NettyRemotingServer; import org.apache.dolphinscheduler.server.worker.prc.WorkerRpcServer;
import org.apache.dolphinscheduler.remote.command.CommandType;
import org.apache.dolphinscheduler.remote.config.NettyServerConfig;
import org.apache.dolphinscheduler.server.log.LoggerRequestProcessor;
import org.apache.dolphinscheduler.server.worker.config.WorkerConfig;
import org.apache.dolphinscheduler.server.worker.processor.HostUpdateProcessor;
import org.apache.dolphinscheduler.server.worker.processor.TaskExecuteProcessor;
import org.apache.dolphinscheduler.server.worker.processor.TaskExecuteResponseAckProcessor;
import org.apache.dolphinscheduler.server.worker.processor.TaskExecuteRunningAckProcessor;
import org.apache.dolphinscheduler.server.worker.processor.TaskKillProcessor;
import org.apache.dolphinscheduler.server.worker.processor.TaskRecallAckProcessor;
import org.apache.dolphinscheduler.server.worker.registry.WorkerRegistryClient; import org.apache.dolphinscheduler.server.worker.registry.WorkerRegistryClient;
import org.apache.dolphinscheduler.server.worker.runner.RetryReportTaskStatusThread; import org.apache.dolphinscheduler.server.worker.runner.RetryReportTaskStatusThread;
import org.apache.dolphinscheduler.server.worker.runner.WorkerManagerThread; import org.apache.dolphinscheduler.server.worker.runner.WorkerManagerThread;
@ -74,17 +66,6 @@ public class WorkerServer implements IStoppable {
*/ */
private static final Logger logger = LoggerFactory.getLogger(WorkerServer.class); private static final Logger logger = LoggerFactory.getLogger(WorkerServer.class);
/**
* netty remote server
*/
private NettyRemotingServer nettyRemotingServer;
/**
* worker config
*/
@Autowired
private WorkerConfig workerConfig;
/** /**
* spring application context * spring application context
* only use it for initialization * only use it for initialization
@ -116,25 +97,7 @@ public class WorkerServer implements IStoppable {
private TaskPluginManager taskPluginManager; private TaskPluginManager taskPluginManager;
@Autowired @Autowired
private TaskExecuteProcessor taskExecuteProcessor; private WorkerRpcServer workerRpcServer;
@Autowired
private TaskKillProcessor taskKillProcessor;
@Autowired
private TaskRecallAckProcessor taskRecallAckProcessor;
@Autowired
private TaskExecuteRunningAckProcessor taskExecuteRunningAckProcessor;
@Autowired
private TaskExecuteResponseAckProcessor taskExecuteResponseAckProcessor;
@Autowired
private HostUpdateProcessor hostUpdateProcessor;
@Autowired
private LoggerRequestProcessor loggerRequestProcessor;
/** /**
* worker server startup, not use web service * worker server startup, not use web service
@ -146,48 +109,19 @@ public class WorkerServer implements IStoppable {
SpringApplication.run(WorkerServer.class); SpringApplication.run(WorkerServer.class);
} }
/**
* worker server run
*/
@PostConstruct @PostConstruct
public void run() { public void run() {
// init remoting server this.workerRpcServer.start();
NettyServerConfig serverConfig = new NettyServerConfig();
serverConfig.setListenPort(workerConfig.getListenPort());
this.nettyRemotingServer = new NettyRemotingServer(serverConfig);
this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_REQUEST, taskExecuteProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.TASK_KILL_REQUEST, taskKillProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_RUNNING_ACK, taskExecuteRunningAckProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_RESPONSE_ACK, taskExecuteResponseAckProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.PROCESS_HOST_UPDATE_REQUEST, hostUpdateProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.TASK_RECALL_ACK, taskRecallAckProcessor);
// logger server
this.nettyRemotingServer.registerProcessor(CommandType.GET_LOG_BYTES_REQUEST, loggerRequestProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.ROLL_VIEW_LOG_REQUEST, loggerRequestProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.VIEW_WHOLE_LOG_REQUEST, loggerRequestProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.REMOVE_TAK_LOG_REQUEST, loggerRequestProcessor);
this.nettyRemotingServer.start();
// install task plugin
this.taskPluginManager.installPlugin(); this.taskPluginManager.installPlugin();
// worker registry
try {
this.workerRegistryClient.registry(); this.workerRegistryClient.registry();
this.workerRegistryClient.setRegistryStoppable(this); this.workerRegistryClient.setRegistryStoppable(this);
Set<String> workerZkPaths = this.workerRegistryClient.getWorkerZkPaths(); Set<String> workerZkPaths = this.workerRegistryClient.getWorkerZkPaths();
this.workerRegistryClient.handleDeadServer(workerZkPaths, NodeType.WORKER, Constants.DELETE_OP); this.workerRegistryClient.handleDeadServer(workerZkPaths, NodeType.WORKER, Constants.DELETE_OP);
} catch (Exception e) {
logger.error(e.getMessage(), e);
throw new RuntimeException(e);
}
// task execute manager
this.workerManagerThread.start(); this.workerManagerThread.start();
// retry report task status
this.retryReportTaskStatusThread.start(); this.retryReportTaskStatusThread.start();
/* /*
@ -195,7 +129,7 @@ public class WorkerServer implements IStoppable {
*/ */
Runtime.getRuntime().addShutdownHook(new Thread(() -> { Runtime.getRuntime().addShutdownHook(new Thread(() -> {
if (Stopper.isRunning()) { if (Stopper.isRunning()) {
close("shutdownHook"); close("WorkerServer shutdown hook");
} }
})); }));
} }
@ -203,24 +137,23 @@ public class WorkerServer implements IStoppable {
public void close(String cause) { public void close(String cause) {
try { try {
// execute only once // execute only once
if (Stopper.isStopped()) { // set stop signal is true
if (!Stopper.stop()) {
logger.warn("WorkerServer is already stopped, current cause: {}", cause);
return; return;
} }
logger.info("worker server is stopping ..., cause : {}", cause); logger.info("Worker server is stopping, current cause : {}", cause);
// set stop signal is true
Stopper.stop();
try { try {
// thread sleep 3 seconds for thread quitely stop // thread sleep 3 seconds for thread quitely stop
Thread.sleep(3000L); Thread.sleep(Constants.SERVER_CLOSE_WAIT_TIME.toMillis());
} catch (Exception e) { } catch (Exception e) {
logger.warn("thread sleep exception", e); logger.warn("Worker server close wait error", e);
} }
// close // close
this.nettyRemotingServer.close(); this.workerRpcServer.close();
this.workerRegistryClient.unRegistry(); this.workerRegistryClient.unRegistry();
this.alertClientService.close(); this.alertClientService.close();
@ -229,8 +162,9 @@ public class WorkerServer implements IStoppable {
// close the application context // close the application context
this.springApplicationContext.close(); this.springApplicationContext.close();
logger.info("Worker server stopped, current cause: {}", cause);
} catch (Exception e) { } catch (Exception e) {
logger.error("worker server stop exception ", e); logger.error("Worker server stop failed, current cause: {}", cause, e);
} }
} }
@ -244,15 +178,22 @@ public class WorkerServer implements IStoppable {
*/ */
public void killAllRunningTasks() { public void killAllRunningTasks() {
Collection<TaskExecutionContext> taskRequests = TaskExecutionContextCacheManager.getAllTaskRequestList(); Collection<TaskExecutionContext> taskRequests = TaskExecutionContextCacheManager.getAllTaskRequestList();
logger.info("ready to kill all cache job, job size:{}", taskRequests.size());
if (CollectionUtils.isEmpty(taskRequests)) { if (CollectionUtils.isEmpty(taskRequests)) {
return; return;
} }
logger.info("Worker begin to kill all cache task, task size: {}", taskRequests.size());
int killNumber = 0;
for (TaskExecutionContext taskRequest : taskRequests) { for (TaskExecutionContext taskRequest : taskRequests) {
// kill task when it's not finished yet // kill task when it's not finished yet
org.apache.dolphinscheduler.plugin.task.api.ProcessUtils.kill(taskRequest); try {
LoggerUtils.setWorkflowAndTaskInstanceIDMDC(taskRequest.getProcessInstanceId(), taskRequest.getTaskInstanceId());
if (ProcessUtils.kill(taskRequest)) {
killNumber++;
}
} finally {
LoggerUtils.removeWorkflowAndTaskInstanceIdMDC();
}
} }
logger.info("Worker after kill all cache task, task size: {}, killed number: {}", taskRequests.size(), killNumber);
} }
} }

97
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/prc/WorkerRpcServer.java

@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.worker.prc;
import org.apache.dolphinscheduler.remote.NettyRemotingServer;
import org.apache.dolphinscheduler.remote.command.CommandType;
import org.apache.dolphinscheduler.remote.config.NettyServerConfig;
import org.apache.dolphinscheduler.server.log.LoggerRequestProcessor;
import org.apache.dolphinscheduler.server.worker.config.WorkerConfig;
import org.apache.dolphinscheduler.server.worker.processor.HostUpdateProcessor;
import org.apache.dolphinscheduler.server.worker.processor.TaskExecuteProcessor;
import org.apache.dolphinscheduler.server.worker.processor.TaskExecuteResponseAckProcessor;
import org.apache.dolphinscheduler.server.worker.processor.TaskExecuteRunningAckProcessor;
import org.apache.dolphinscheduler.server.worker.processor.TaskKillProcessor;
import org.apache.dolphinscheduler.server.worker.processor.TaskRecallAckProcessor;
import java.io.Closeable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
@Service
public class WorkerRpcServer implements Closeable {
private static final Logger LOGGER = LoggerFactory.getLogger(WorkerRpcServer.class);
@Autowired
private TaskExecuteProcessor taskExecuteProcessor;
@Autowired
private TaskKillProcessor taskKillProcessor;
@Autowired
private TaskRecallAckProcessor taskRecallAckProcessor;
@Autowired
private TaskExecuteRunningAckProcessor taskExecuteRunningAckProcessor;
@Autowired
private TaskExecuteResponseAckProcessor taskExecuteResponseAckProcessor;
@Autowired
private HostUpdateProcessor hostUpdateProcessor;
@Autowired
private LoggerRequestProcessor loggerRequestProcessor;
@Autowired
private WorkerConfig workerConfig;
private NettyRemotingServer nettyRemotingServer;
public void start() {
LOGGER.info("Worker rpc server starting");
NettyServerConfig serverConfig = new NettyServerConfig();
serverConfig.setListenPort(workerConfig.getListenPort());
this.nettyRemotingServer = new NettyRemotingServer(serverConfig);
this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_REQUEST, taskExecuteProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.TASK_KILL_REQUEST, taskKillProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_RUNNING_ACK, taskExecuteRunningAckProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.TASK_EXECUTE_RESPONSE_ACK, taskExecuteResponseAckProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.PROCESS_HOST_UPDATE_REQUEST, hostUpdateProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.TASK_RECALL_ACK, taskRecallAckProcessor);
// logger server
this.nettyRemotingServer.registerProcessor(CommandType.GET_LOG_BYTES_REQUEST, loggerRequestProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.ROLL_VIEW_LOG_REQUEST, loggerRequestProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.VIEW_WHOLE_LOG_REQUEST, loggerRequestProcessor);
this.nettyRemotingServer.registerProcessor(CommandType.REMOVE_TAK_LOG_REQUEST, loggerRequestProcessor);
this.nettyRemotingServer.start();
LOGGER.info("Worker rpc server started");
}
@Override
public void close() {
LOGGER.info("Worker rpc server closing");
this.nettyRemotingServer.close();
LOGGER.info("Worker rpc server closed");
}
}

14
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskExecuteProcessor.java

@ -18,10 +18,12 @@
package org.apache.dolphinscheduler.server.worker.processor; package org.apache.dolphinscheduler.server.worker.processor;
import org.apache.dolphinscheduler.common.Constants; import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.storage.StorageOperate;
import org.apache.dolphinscheduler.common.utils.CommonUtils; import org.apache.dolphinscheduler.common.utils.CommonUtils;
import org.apache.dolphinscheduler.common.utils.DateUtils; import org.apache.dolphinscheduler.common.utils.DateUtils;
import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.common.utils.FileUtils;
import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.common.utils.NetUtils; import org.apache.dolphinscheduler.common.utils.NetUtils;
import org.apache.dolphinscheduler.common.utils.OSUtils; import org.apache.dolphinscheduler.common.utils.OSUtils;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext; import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
@ -90,6 +92,9 @@ public class TaskExecuteProcessor implements NettyRequestProcessor {
@Autowired @Autowired
private WorkerManagerThread workerManager; private WorkerManagerThread workerManager;
@Autowired(required = false)
private StorageOperate storageOperate;
@Counted(value = "ds.task.execution.count", description = "task execute total count") @Counted(value = "ds.task.execution.count", description = "task execute total count")
@Timed(value = "ds.task.execution.duration", percentiles = {0.5, 0.75, 0.95, 0.99}, histogram = true) @Timed(value = "ds.task.execution.duration", percentiles = {0.5, 0.75, 0.95, 0.99}, histogram = true)
@Override @Override
@ -113,6 +118,9 @@ public class TaskExecuteProcessor implements NettyRequestProcessor {
logger.error("task execution context is null"); logger.error("task execution context is null");
return; return;
} }
try {
LoggerUtils.setWorkflowAndTaskInstanceIDMDC(taskExecutionContext.getProcessInstanceId(), taskExecutionContext.getTaskInstanceId());
TaskMetrics.incrTaskTypeExecuteCount(taskExecutionContext.getTaskType()); TaskMetrics.incrTaskTypeExecuteCount(taskExecutionContext.getTaskType());
// set cache, it will be used when kill task // set cache, it will be used when kill task
@ -178,12 +186,16 @@ public class TaskExecuteProcessor implements NettyRequestProcessor {
} }
// submit task to manager // submit task to manager
boolean offer = workerManager.offer(new TaskExecuteThread(taskExecutionContext, taskCallbackService, alertClientService, taskPluginManager)); boolean offer = workerManager.offer(
new TaskExecuteThread(taskExecutionContext, taskCallbackService, alertClientService, taskPluginManager, storageOperate));
if (!offer) { if (!offer) {
logger.warn("submit task to wait queue error, queue is full, queue size is {}, taskInstanceId: {}", logger.warn("submit task to wait queue error, queue is full, queue size is {}, taskInstanceId: {}",
workerManager.getWaitSubmitQueueSize(), taskExecutionContext.getTaskInstanceId()); workerManager.getWaitSubmitQueueSize(), taskExecutionContext.getTaskInstanceId());
taskCallbackService.sendRecallCommand(taskExecutionContext); taskCallbackService.sendRecallCommand(taskExecutionContext);
} }
} finally {
LoggerUtils.removeWorkflowAndTaskInstanceIdMDC();
}
} }
/** /**

7
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/processor/TaskExecuteRunningAckProcessor.java

@ -18,6 +18,7 @@
package org.apache.dolphinscheduler.server.worker.processor; package org.apache.dolphinscheduler.server.worker.processor;
import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.JSONUtils;
import org.apache.dolphinscheduler.common.utils.LoggerUtils;
import org.apache.dolphinscheduler.plugin.task.api.enums.ExecutionStatus; import org.apache.dolphinscheduler.plugin.task.api.enums.ExecutionStatus;
import org.apache.dolphinscheduler.remote.command.Command; import org.apache.dolphinscheduler.remote.command.Command;
import org.apache.dolphinscheduler.remote.command.CommandType; import org.apache.dolphinscheduler.remote.command.CommandType;
@ -48,16 +49,20 @@ public class TaskExecuteRunningAckProcessor implements NettyRequestProcessor {
TaskExecuteRunningAckCommand runningAckCommand = JSONUtils.parseObject( TaskExecuteRunningAckCommand runningAckCommand = JSONUtils.parseObject(
command.getBody(), TaskExecuteRunningAckCommand.class); command.getBody(), TaskExecuteRunningAckCommand.class);
if (runningAckCommand == null) { if (runningAckCommand == null) {
logger.error("task execute running ack command is null"); logger.error("task execute running ack command is null");
return; return;
} }
try {
LoggerUtils.setTaskInstanceIdMDC(runningAckCommand.getTaskInstanceId());
logger.info("task execute running ack command : {}", runningAckCommand); logger.info("task execute running ack command : {}", runningAckCommand);
if (runningAckCommand.getStatus() == ExecutionStatus.SUCCESS.getCode()) { if (runningAckCommand.getStatus() == ExecutionStatus.SUCCESS.getCode()) {
ResponseCache.get().removeRunningCache(runningAckCommand.getTaskInstanceId()); ResponseCache.get().removeRunningCache(runningAckCommand.getTaskInstanceId());
} }
} finally {
LoggerUtils.removeTaskInstanceIdMDC();
}
} }
} }

4
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/RetryReportTaskStatusThread.java

@ -47,9 +47,11 @@ public class RetryReportTaskStatusThread implements Runnable {
private TaskCallbackService taskCallbackService; private TaskCallbackService taskCallbackService;
public void start() { public void start() {
logger.info("Retry report task status thread starting");
Thread thread = new Thread(this, "RetryReportTaskStatusThread"); Thread thread = new Thread(this, "RetryReportTaskStatusThread");
thread.setDaemon(true); thread.setDaemon(true);
thread.start(); thread.start();
logger.info("Retry report task status thread started");
} }
/** /**
@ -91,7 +93,7 @@ public class RetryReportTaskStatusThread implements Runnable {
} }
} }
} catch (Exception e) { } catch (Exception e) {
logger.warn("retry report task status error", e); logger.warn("Retry report task status error", e);
} }
} }
} }

19
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/TaskExecuteThread.java

@ -77,14 +77,6 @@ public class TaskExecuteThread implements Runnable, Delayed {
*/ */
private TaskExecutionContext taskExecutionContext; private TaskExecutionContext taskExecutionContext;
public StorageOperate getStorageOperate() {
return storageOperate;
}
public void setStorageOperate(StorageOperate storageOperate) {
this.storageOperate = storageOperate;
}
private StorageOperate storageOperate; private StorageOperate storageOperate;
/** /**
@ -112,20 +104,24 @@ public class TaskExecuteThread implements Runnable, Delayed {
*/ */
public TaskExecuteThread(TaskExecutionContext taskExecutionContext, public TaskExecuteThread(TaskExecutionContext taskExecutionContext,
TaskCallbackService taskCallbackService, TaskCallbackService taskCallbackService,
AlertClientService alertClientService) { AlertClientService alertClientService,
StorageOperate storageOperate) {
this.taskExecutionContext = taskExecutionContext; this.taskExecutionContext = taskExecutionContext;
this.taskCallbackService = taskCallbackService; this.taskCallbackService = taskCallbackService;
this.alertClientService = alertClientService; this.alertClientService = alertClientService;
this.storageOperate = storageOperate;
} }
public TaskExecuteThread(TaskExecutionContext taskExecutionContext, public TaskExecuteThread(TaskExecutionContext taskExecutionContext,
TaskCallbackService taskCallbackService, TaskCallbackService taskCallbackService,
AlertClientService alertClientService, AlertClientService alertClientService,
TaskPluginManager taskPluginManager) { TaskPluginManager taskPluginManager,
StorageOperate storageOperate) {
this.taskExecutionContext = taskExecutionContext; this.taskExecutionContext = taskExecutionContext;
this.taskCallbackService = taskCallbackService; this.taskCallbackService = taskCallbackService;
this.alertClientService = alertClientService; this.alertClientService = alertClientService;
this.taskPluginManager = taskPluginManager; this.taskPluginManager = taskPluginManager;
this.storageOperate = storageOperate;
} }
@Override @Override
@ -140,6 +136,7 @@ public class TaskExecuteThread implements Runnable, Delayed {
} }
try { try {
LoggerUtils.setWorkflowAndTaskInstanceIDMDC(taskExecutionContext.getProcessInstanceId(), taskExecutionContext.getTaskInstanceId());
logger.info("script path : {}", taskExecutionContext.getExecutePath()); logger.info("script path : {}", taskExecutionContext.getExecutePath());
if (taskExecutionContext.getStartTime() == null) { if (taskExecutionContext.getStartTime() == null) {
taskExecutionContext.setStartTime(new Date()); taskExecutionContext.setStartTime(new Date());
@ -212,6 +209,7 @@ public class TaskExecuteThread implements Runnable, Delayed {
TaskExecutionContextCacheManager.removeByTaskInstanceId(taskExecutionContext.getTaskInstanceId()); TaskExecutionContextCacheManager.removeByTaskInstanceId(taskExecutionContext.getTaskInstanceId());
taskCallbackService.sendTaskExecuteResponseCommand(taskExecutionContext); taskCallbackService.sendTaskExecuteResponseCommand(taskExecutionContext);
clearTaskExecPath(); clearTaskExecPath();
LoggerUtils.removeWorkflowAndTaskInstanceIdMDC();
} }
} }
@ -309,6 +307,7 @@ public class TaskExecuteThread implements Runnable, Delayed {
/** /**
* download resource check * download resource check
*
* @param execLocalPath * @param execLocalPath
* @param projectRes * @param projectRes
* @return * @return

3
dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/runner/WorkerManagerThread.java

@ -144,9 +144,11 @@ public class WorkerManagerThread implements Runnable {
} }
public void start() { public void start() {
logger.info("Worker manager thread starting");
Thread thread = new Thread(this, this.getClass().getName()); Thread thread = new Thread(this, this.getClass().getName());
thread.setDaemon(true); thread.setDaemon(true);
thread.start(); thread.start();
logger.info("Worker manager thread started");
} }
@Override @Override
@ -157,7 +159,6 @@ public class WorkerManagerThread implements Runnable {
try { try {
if (this.getThreadPoolQueueSize() <= workerExecThreads) { if (this.getThreadPoolQueueSize() <= workerExecThreads) {
taskExecuteThread = waitSubmitQueue.take(); taskExecuteThread = waitSubmitQueue.take();
taskExecuteThread.setStorageOperate(storageOperate);
workerExecService.submit(taskExecuteThread); workerExecService.submit(taskExecuteThread);
} else { } else {
WorkerServerMetrics.incWorkerOverloadCount(); WorkerServerMetrics.incWorkerOverloadCount();

4
dolphinscheduler-worker/src/main/resources/logback-spring.xml

@ -22,7 +22,7 @@
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender"> <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder> <encoder>
<pattern> <pattern>
[%level] %date{yyyy-MM-dd HH:mm:ss.SSS Z} %logger{96}:[%line] - %msg%n [%level] %date{yyyy-MM-dd HH:mm:ss.SSS Z} %logger{96}:[%line] - [WorkflowInstance-%X{workflowInstanceId:-0}][TaskInstance-%X{taskInstanceId:-0}] - %msg%n
</pattern> </pattern>
<charset>UTF-8</charset> <charset>UTF-8</charset>
</encoder> </encoder>
@ -58,7 +58,7 @@
</rollingPolicy> </rollingPolicy>
<encoder> <encoder>
<pattern> <pattern>
[%level] %date{yyyy-MM-dd HH:mm:ss.SSS Z} %logger{96}:[%line] - %msg%n [%level] %date{yyyy-MM-dd HH:mm:ss.SSS Z} %logger{96}:[%line] - [WorkflowInstance-%X{workflowInstanceId:-0}][TaskInstance-%X{taskInstanceId:-0}] - %msg%n
</pattern> </pattern>
<charset>UTF-8</charset> <charset>UTF-8</charset>
</encoder> </encoder>

20
dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/processor/TaskExecuteProcessorTest.java

@ -17,6 +17,7 @@
package org.apache.dolphinscheduler.server.worker.processor; package org.apache.dolphinscheduler.server.worker.processor;
import org.apache.dolphinscheduler.common.storage.StorageOperate;
import org.apache.dolphinscheduler.common.thread.ThreadUtils; import org.apache.dolphinscheduler.common.thread.ThreadUtils;
import org.apache.dolphinscheduler.common.utils.FileUtils; import org.apache.dolphinscheduler.common.utils.FileUtils;
import org.apache.dolphinscheduler.common.utils.JSONUtils; import org.apache.dolphinscheduler.common.utils.JSONUtils;
@ -63,6 +64,8 @@ public class TaskExecuteProcessorTest {
private ExecutorService workerExecService; private ExecutorService workerExecService;
private StorageOperate storageOperate;
private WorkerConfig workerConfig; private WorkerConfig workerConfig;
private Command command; private Command command;
@ -104,7 +107,11 @@ public class TaskExecuteProcessorTest {
.thenReturn(workerConfig); .thenReturn(workerConfig);
workerManager = PowerMockito.mock(WorkerManagerThread.class); workerManager = PowerMockito.mock(WorkerManagerThread.class);
PowerMockito.when(workerManager.offer(new TaskExecuteThread(taskExecutionContext, taskCallbackService, alertClientService))).thenReturn(Boolean.TRUE);
storageOperate = PowerMockito.mock(StorageOperate.class);
PowerMockito.when(
workerManager.offer(new TaskExecuteThread(taskExecutionContext, taskCallbackService, alertClientService, storageOperate)))
.thenReturn(Boolean.TRUE);
PowerMockito.when(SpringApplicationContext.getBean(WorkerManagerThread.class)) PowerMockito.when(SpringApplicationContext.getBean(WorkerManagerThread.class))
.thenReturn(workerManager); .thenReturn(workerManager);
@ -132,7 +139,8 @@ public class TaskExecuteProcessorTest {
.thenReturn(taskExecutionContext.getExecutePath()); .thenReturn(taskExecutionContext.getExecutePath());
PowerMockito.doNothing().when(FileUtils.class, "createWorkDirIfAbsent", taskExecutionContext.getExecutePath()); PowerMockito.doNothing().when(FileUtils.class, "createWorkDirIfAbsent", taskExecutionContext.getExecutePath());
SimpleTaskExecuteThread simpleTaskExecuteThread = new SimpleTaskExecuteThread(null, null, null, alertClientService); SimpleTaskExecuteThread simpleTaskExecuteThread = new SimpleTaskExecuteThread(
null, null, null, alertClientService, storageOperate);
PowerMockito.whenNew(TaskExecuteThread.class).withAnyArguments() PowerMockito.whenNew(TaskExecuteThread.class).withAnyArguments()
.thenReturn(simpleTaskExecuteThread); .thenReturn(simpleTaskExecuteThread);
} }
@ -172,8 +180,12 @@ public class TaskExecuteProcessorTest {
private static class SimpleTaskExecuteThread extends TaskExecuteThread { private static class SimpleTaskExecuteThread extends TaskExecuteThread {
public SimpleTaskExecuteThread(TaskExecutionContext taskExecutionContext, TaskCallbackService taskCallbackService, Logger taskLogger, AlertClientService alertClientService) { public SimpleTaskExecuteThread(TaskExecutionContext taskExecutionContext,
super(taskExecutionContext, taskCallbackService, alertClientService); TaskCallbackService taskCallbackService,
Logger taskLogger,
AlertClientService alertClientService,
StorageOperate storageOperate) {
super(taskExecutionContext, taskCallbackService, alertClientService, storageOperate);
} }
@Override @Override

21
dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/runner/TaskExecuteThreadTest.java

@ -17,12 +17,20 @@
package org.apache.dolphinscheduler.server.worker.runner; package org.apache.dolphinscheduler.server.worker.runner;
import org.apache.commons.lang3.tuple.Pair; import org.apache.dolphinscheduler.common.storage.StorageOperate;
import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext; import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext;
import org.apache.dolphinscheduler.server.worker.processor.TaskCallbackService; import org.apache.dolphinscheduler.server.worker.processor.TaskCallbackService;
import org.apache.dolphinscheduler.server.worker.registry.WorkerRegistryClientTest; import org.apache.dolphinscheduler.server.worker.registry.WorkerRegistryClientTest;
import org.apache.dolphinscheduler.service.alert.AlertClientService; import org.apache.dolphinscheduler.service.alert.AlertClientService;
import org.apache.dolphinscheduler.service.task.TaskPluginManager; import org.apache.dolphinscheduler.service.task.TaskPluginManager;
import org.apache.commons.lang3.tuple.Pair;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
@ -31,11 +39,6 @@ import org.powermock.modules.junit4.PowerMockRunner;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@RunWith(PowerMockRunner.class) @RunWith(PowerMockRunner.class)
public class TaskExecuteThreadTest { public class TaskExecuteThreadTest {
@ -50,12 +53,16 @@ public class TaskExecuteThreadTest {
@Mock @Mock
private AlertClientService alertClientService; private AlertClientService alertClientService;
@Mock
private StorageOperate storageOperate;
@Mock @Mock
private TaskPluginManager taskPluginManager; private TaskPluginManager taskPluginManager;
@Test @Test
public void checkTest() { public void checkTest() {
TaskExecuteThread taskExecuteThread = new TaskExecuteThread(taskExecutionContext, taskCallbackService, alertClientService, taskPluginManager); TaskExecuteThread taskExecuteThread = new TaskExecuteThread(taskExecutionContext, taskCallbackService,
alertClientService, taskPluginManager, storageOperate);
String path = "/"; String path = "/";
Map<String, String> projectRes = new HashMap<>(); Map<String, String> projectRes = new HashMap<>();

Loading…
Cancel
Save