Browse Source

TaskManager refactor (#2302)

* 1, master persistent task
2. extract  master and worker communication model

* 1, master persistent task
2. extract  master and worker communication model

* 1, master persistent task
2. extract  master and worker communication model

* add license

* modify javadoc error

* TaskExecutionContext create modify

* buildAckCommand taskInstanceId not set modify

* java doc error modify

* add comment

* ExecutorManager interface add generic type

* add TaskInstanceCacheManager receive Worker report result

* TaskInstance setExecutePath

* add TaskInstanceCacheManager to receive Worker Task result report

* TaskInstanceCacheManager add remove method

* add license

* add dispatcht task method

* AbstractCommandExecutor remove db access

* AbstractCommandExecutor remove db access

* AbstractCommandExecutor remove db access

* AbstractCommandExecutor remove db access

* AbstractCommandExecutor remove db access

* AbstractCommandExecutor remove db access

* AbstractCommandExecutor remove db access

* taskInstanceCache is null ,need load from db

* taskInstanceCache is null ,need load from db

* taskInstanceCache is null ,need load from db

* 1,worker TaskPros use TaskExecutionContext replase
2,Master kill Task , KillTaskProcessor modify

* worker remove db

* ShellTask modify

* master persistence processId and appIds

* master persistence processId and appIds

* master add kill task logic

* master add kill task logic

* master add kill task logic

* javadoc error modify

* remove chinese log

* executeDirectly method add Override

* remote module modify

* TaskKillResponseProcessor command type modify

* create buildKillCommand

* host add host:port format

* host add host:port format

* TaskAckProcessor modify

* TaskAckProcessor modify

* task prioriry refator

* remove ITaskQueue

* task prioriry refator

* remove ITaskQueue

* TaskPriority refactor

* remove logs

* WorkerServer refactor

* MasterSchedulerService modify

* WorkerConfig listen port modify

* modify master and worker listen port

* cancelTaskInstance set TaskExecutionContext host,logPath,executePath

* cancelTaskInstance set TaskExecutionContext host,logPath,executePath

* Encapsulate the parameters required by sqltask

* 1,Encapsulate the parameters required by sqltask
2,SQLTask optimization

* AbstractTask modify

* ProcedureTask optimization

* MasterSchedulerService modify

* TaskUpdateQueueConsumer modify

* test

* DataxTask process run debug

* DataxTask process run debug

* add protobuf dependency,MR、Spark task etc need this

* TaskUpdateQueueConsumer modify

* TaskExecutionContextBuilder set TaskInstance workgroup

* WorkerGroupService queryAllGroup modify
query available work group

* 1,get workergroup from zk modify
2,SpringConnectionFactory repeat load modify

* master and worker register ip  use OSUtils.getHost()

* ProcessInstance host set ip:port format

* worker fault tolerance modify

* Constants and .env modify

* master fault tolerant bug modify

* UT add pom.xml

* timing online  modify

* when taskResponse is faster than taskAck to db,task state will error
add async queue and new a thread reslove this problem

* TaskExecutionContext set host

* 1,TaskManager refactor
2, api start load server dolphinschedule-daemon.sh modify

* 1,TaskManager refactor
2, api start load server dolphinschedule-daemon.sh modify

* add UT in pom.xml

* revert dolphinscheduler-daemon.sh

Co-authored-by: qiaozhanwei <qiaozhanwei@analysys.com.cn>
pull/2/head
qiaozhanwei 5 years ago committed by GitHub
parent
commit
f112415b13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 8
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/consumer/TaskUpdateQueueConsumer.java
  2. 36
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/manager/TaskEvent.java
  3. 58
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/manager/TaskEventEnum.java
  4. 44
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/manager/TaskManager.java
  5. 6
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/processor/TaskAckProcessor.java
  6. 6
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/processor/TaskResponseProcessor.java
  7. 4
      pom.xml

8
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/consumer/TaskUpdateQueueConsumer.java

@ -18,7 +18,6 @@
package org.apache.dolphinscheduler.server.master.consumer; package org.apache.dolphinscheduler.server.master.consumer;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.enums.ExecutionStatus; import org.apache.dolphinscheduler.common.enums.ExecutionStatus;
import org.apache.dolphinscheduler.common.enums.TaskType; import org.apache.dolphinscheduler.common.enums.TaskType;
import org.apache.dolphinscheduler.common.enums.UdfType; import org.apache.dolphinscheduler.common.enums.UdfType;
@ -89,12 +88,11 @@ public class TaskUpdateQueueConsumer extends Thread{
public void run() { public void run() {
while (Stopper.isRunning()){ while (Stopper.isRunning()){
try { try {
if (taskUpdateQueue.size() == 0){ // if not task , blocking here
Thread.sleep(Constants.SLEEP_TIME_MILLIS);
continue;
}
String taskPriorityInfo = taskUpdateQueue.take(); String taskPriorityInfo = taskUpdateQueue.take();
TaskPriority taskPriority = TaskPriority.of(taskPriorityInfo); TaskPriority taskPriority = TaskPriority.of(taskPriorityInfo);
dispatch(taskPriority.getTaskId()); dispatch(taskPriority.getTaskId());
}catch (Exception e){ }catch (Exception e){
logger.error("dispatcher task error",e); logger.error("dispatcher task error",e);

36
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/manager/TaskEvent.java

@ -26,9 +26,6 @@ import java.util.Date;
*/ */
public class TaskEvent { public class TaskEvent {
public static final String ACK = "ack";
public static final String RESPONSE = "response";
/** /**
* taskInstanceId * taskInstanceId
*/ */
@ -77,7 +74,7 @@ public class TaskEvent {
/** /**
* ack / response * ack / response
*/ */
private String type; private TaskEventEnum type;
/** /**
@ -88,22 +85,21 @@ public class TaskEvent {
* @param executePath executePath * @param executePath executePath
* @param logPath logPath * @param logPath logPath
* @param taskInstanceId taskInstanceId * @param taskInstanceId taskInstanceId
* @param type type
*/ */
public void receiveAck(ExecutionStatus state, public TaskEvent(ExecutionStatus state,
Date startTime, Date startTime,
String workerAddress, String workerAddress,
String executePath, String executePath,
String logPath, String logPath,
int taskInstanceId, int taskInstanceId){
String type){
this.state = state; this.state = state;
this.startTime = startTime; this.startTime = startTime;
this.workerAddress = workerAddress; this.workerAddress = workerAddress;
this.executePath = executePath; this.executePath = executePath;
this.logPath = logPath; this.logPath = logPath;
this.taskInstanceId = taskInstanceId; this.taskInstanceId = taskInstanceId;
this.type = type; this.type = TaskEventEnum.ACK;
} }
/** /**
@ -113,20 +109,18 @@ public class TaskEvent {
* @param processId processId * @param processId processId
* @param appIds appIds * @param appIds appIds
* @param taskInstanceId taskInstanceId * @param taskInstanceId taskInstanceId
* @param type type
*/ */
public void receiveResponse(ExecutionStatus state, public TaskEvent(ExecutionStatus state,
Date endTime, Date endTime,
int processId, int processId,
String appIds, String appIds,
int taskInstanceId, int taskInstanceId){
String type){
this.state = state; this.state = state;
this.endTime = endTime; this.endTime = endTime;
this.processId = processId; this.processId = processId;
this.appIds = appIds; this.appIds = appIds;
this.taskInstanceId = taskInstanceId; this.taskInstanceId = taskInstanceId;
this.type = type; this.type = TaskEventEnum.RESPONSE;
} }
public int getTaskInstanceId() { public int getTaskInstanceId() {
@ -201,11 +195,11 @@ public class TaskEvent {
this.appIds = appIds; this.appIds = appIds;
} }
public String getType() { public TaskEventEnum getType() {
return type; return type;
} }
public void setType(String type) { public void setType(TaskEventEnum type) {
this.type = type; this.type = type;
} }
@ -221,7 +215,7 @@ public class TaskEvent {
", logPath='" + logPath + '\'' + ", logPath='" + logPath + '\'' +
", processId=" + processId + ", processId=" + processId +
", appIds='" + appIds + '\'' + ", appIds='" + appIds + '\'' +
", type='" + type + '\'' + ", type=" + type +
'}'; '}';
} }
} }

58
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/manager/TaskEventEnum.java

@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.server.master.manager;
import com.baomidou.mybatisplus.annotation.EnumValue;
import org.apache.dolphinscheduler.common.enums.ExecutionStatus;
import java.util.Date;
/**
* task event enum
*/
public enum TaskEventEnum {
ACK(0, "task ack"),
RESPONSE(1, "task response result");
TaskEventEnum(int code, String descp){
this.code = code;
this.descp = descp;
}
@EnumValue
private final int code;
private final String descp;
public String getDescp() {
return descp;
}
public int getCode() {
return code;
}
public static TaskEventEnum of(int status){
for(TaskEventEnum es : values()){
if(es.getCode() == status){
return es;
}
}
throw new IllegalArgumentException("invalid status : " + status);
}
}

44
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/manager/TaskManager.java

@ -17,7 +17,6 @@
package org.apache.dolphinscheduler.server.master.manager; package org.apache.dolphinscheduler.server.master.manager;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.thread.Stopper; import org.apache.dolphinscheduler.common.thread.Stopper;
import org.apache.dolphinscheduler.service.process.ProcessService; import org.apache.dolphinscheduler.service.process.ProcessService;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -28,6 +27,7 @@ import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct; import javax.annotation.PostConstruct;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
import static org.apache.dolphinscheduler.server.master.manager.TaskEventEnum.*;
/** /**
* task manager * task manager
@ -56,6 +56,7 @@ public class TaskManager {
@PostConstruct @PostConstruct
public void init(){ public void init(){
TaskWorker taskWorker = new TaskWorker(); TaskWorker taskWorker = new TaskWorker();
taskWorker.setName("TaskWorkerThread");
taskWorker.start(); taskWorker.start();
} }
@ -83,12 +84,8 @@ public class TaskManager {
while (Stopper.isRunning()){ while (Stopper.isRunning()){
try { try {
if (attemptQueue.size() == 0){ // if not task , blocking here
Thread.sleep(Constants.SLEEP_TIME_MILLIS);
continue;
}
TaskEvent taskEvent = attemptQueue.take(); TaskEvent taskEvent = attemptQueue.take();
persist(taskEvent); persist(taskEvent);
}catch (Exception e){ }catch (Exception e){
@ -102,19 +99,28 @@ public class TaskManager {
* @param taskEvent taskEvent * @param taskEvent taskEvent
*/ */
private void persist(TaskEvent taskEvent){ private void persist(TaskEvent taskEvent){
if (TaskEvent.ACK.equals(taskEvent.getType())){ // task event type
processService.changeTaskState(taskEvent.getState(), TaskEventEnum type = taskEvent.getType();
taskEvent.getStartTime(),
taskEvent.getWorkerAddress(), switch (type){
taskEvent.getExecutePath(), case ACK:
taskEvent.getLogPath(), processService.changeTaskState(taskEvent.getState(),
taskEvent.getTaskInstanceId()); taskEvent.getStartTime(),
}else if (TaskEvent.RESPONSE.equals(taskEvent.getType())){ taskEvent.getWorkerAddress(),
processService.changeTaskState(taskEvent.getState(), taskEvent.getExecutePath(),
taskEvent.getEndTime(), taskEvent.getLogPath(),
taskEvent.getProcessId(), taskEvent.getTaskInstanceId());
taskEvent.getAppIds(), break;
taskEvent.getTaskInstanceId()); case RESPONSE:
processService.changeTaskState(taskEvent.getState(),
taskEvent.getEndTime(),
taskEvent.getProcessId(),
taskEvent.getAppIds(),
taskEvent.getTaskInstanceId());
break;
default:
throw new IllegalArgumentException("invalid task event type : " + type);
} }
} }
} }

6
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/processor/TaskAckProcessor.java

@ -73,14 +73,12 @@ public class TaskAckProcessor implements NettyRequestProcessor {
String workerAddress = ChannelUtils.toAddress(channel).getAddress(); String workerAddress = ChannelUtils.toAddress(channel).getAddress();
// TaskEvent // TaskEvent
TaskEvent taskEvent = new TaskEvent(); TaskEvent taskEvent = new TaskEvent(ExecutionStatus.of(taskAckCommand.getStatus()),
taskEvent.receiveAck(ExecutionStatus.of(taskAckCommand.getStatus()),
taskAckCommand.getStartTime(), taskAckCommand.getStartTime(),
workerAddress, workerAddress,
taskAckCommand.getExecutePath(), taskAckCommand.getExecutePath(),
taskAckCommand.getLogPath(), taskAckCommand.getLogPath(),
taskAckCommand.getTaskInstanceId(), taskAckCommand.getTaskInstanceId());
TaskEvent.ACK);
taskManager.putTask(taskEvent); taskManager.putTask(taskEvent);

6
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/processor/TaskResponseProcessor.java

@ -73,13 +73,11 @@ public class TaskResponseProcessor implements NettyRequestProcessor {
taskInstanceCacheManager.cacheTaskInstance(responseCommand); taskInstanceCacheManager.cacheTaskInstance(responseCommand);
// TaskEvent // TaskEvent
TaskEvent taskEvent = new TaskEvent(); TaskEvent taskEvent = new TaskEvent(ExecutionStatus.of(responseCommand.getStatus()),
taskEvent.receiveResponse(ExecutionStatus.of(responseCommand.getStatus()),
responseCommand.getEndTime(), responseCommand.getEndTime(),
responseCommand.getProcessId(), responseCommand.getProcessId(),
responseCommand.getAppIds(), responseCommand.getAppIds(),
responseCommand.getTaskInstanceId(), responseCommand.getTaskInstanceId());
TaskEvent.RESPONSE);
taskManager.putTask(taskEvent); taskManager.putTask(taskEvent);
} }

4
pom.xml

@ -732,6 +732,8 @@
<include>**/server/log/WorkerLogFilterTest.java</include> <include>**/server/log/WorkerLogFilterTest.java</include>
<include>**/server/master/executor/NettyExecutorManagerTest.java</include> <include>**/server/master/executor/NettyExecutorManagerTest.java</include>
<include>**/server/master/host/LowerWeightRoundRobinTest.java</include> <include>**/server/master/host/LowerWeightRoundRobinTest.java</include>
<include>**/server/master/host/RandomSelectorTest.java</include>
<include>**/server/master/host/RoundRobinSelectorTest.java</include>
<include>**/server/master/register/MasterRegistryTest.java</include> <include>**/server/master/register/MasterRegistryTest.java</include>
<include>**/server/master/AlertManagerTest.java</include> <include>**/server/master/AlertManagerTest.java</include>
<include>**/server/master/MasterCommandTest.java</include> <include>**/server/master/MasterCommandTest.java</include>
@ -743,6 +745,7 @@
<include>**/server/utils/ParamUtilsTest.java</include> <include>**/server/utils/ParamUtilsTest.java</include>
<include>**/server/utils/ProcessUtilsTest.java</include> <include>**/server/utils/ProcessUtilsTest.java</include>
<include>**/server/utils/SparkArgsUtilsTest.java</include> <include>**/server/utils/SparkArgsUtilsTest.java</include>
<include>**/server/worker/processor/TaskCallbackServiceTest.java</include>
<include>**/server/worker/register/WorkerRegistryTest.java</include> <include>**/server/worker/register/WorkerRegistryTest.java</include>
<include>**/server/worker/shell/ShellCommandExecutorTest.java</include> <include>**/server/worker/shell/ShellCommandExecutorTest.java</include>
<include>**/server/worker/sql/SqlExecutorTest.java</include> <include>**/server/worker/sql/SqlExecutorTest.java</include>
@ -750,6 +753,7 @@
<include>**/server/worker/task/dependent/DependentTaskTest.java</include> <include>**/server/worker/task/dependent/DependentTaskTest.java</include>
<include>**/server/worker/task/spark/SparkTaskTest.java</include> <include>**/server/worker/task/spark/SparkTaskTest.java</include>
<include>**/server/worker/task/EnvFileTest.java</include> <include>**/server/worker/task/EnvFileTest.java</include>
</includes> </includes>
<!-- <skip>true</skip> --> <!-- <skip>true</skip> -->
<argLine>-Xmx2048m</argLine> <argLine>-Xmx2048m</argLine>

Loading…
Cancel
Save