Browse Source

[DS-6849][MasterServer] fetch more commands and handle in parallel (#6850)

* [DS-6849][MasterServer] fetch more commands and handle in parallel

* add return

* handle command with check

* add test

* delete master prefix

Co-authored-by: caishunfeng <534328519@qq.com>
3.0.0/version-upgrade
wind 3 years ago committed by GitHub
parent
commit
595e4843d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 18
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterConfig.java
  2. 121
      dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/runner/MasterSchedulerService.java
  3. 4
      dolphinscheduler-server/src/main/resources/application-master.yaml
  4. 38
      dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessService.java
  5. 65
      dolphinscheduler-service/src/test/java/org/apache/dolphinscheduler/service/process/ProcessServiceTest.java

18
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterConfig.java

@ -28,6 +28,8 @@ import org.springframework.stereotype.Component;
@ConfigurationProperties("master")
public class MasterConfig {
private int listenPort;
private int fetchCommandNum;
private int preExecThreads;
private int execThreads;
private int execTaskNum;
private int dispatchTaskNumber;
@ -48,6 +50,22 @@ public class MasterConfig {
this.listenPort = listenPort;
}
public int getFetchCommandNum() {
return fetchCommandNum;
}
public void setFetchCommandNum(int fetchCommandNum) {
this.fetchCommandNum = fetchCommandNum;
}
public int getPreExecThreads() {
return preExecThreads;
}
public void setPreExecThreads(int preExecThreads) {
this.preExecThreads = preExecThreads;
}
public int getExecThreads() {
return execThreads;
}

121
dolphinscheduler-server/src/main/java/org/apache/dolphinscheduler/server/master/runner/MasterSchedulerService.java

@ -36,9 +36,14 @@ import org.apache.dolphinscheduler.server.master.registry.ServerNodeManager;
import org.apache.dolphinscheduler.service.alert.ProcessAlertManager;
import org.apache.dolphinscheduler.service.process.ProcessService;
import org.apache.commons.collections4.CollectionUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
@ -90,6 +95,11 @@ public class MasterSchedulerService extends Thread {
@Autowired
NettyExecutorManager nettyExecutorManager;
/**
* master prepare exec service
*/
private ThreadPoolExecutor masterPrepareExecService;
/**
* master exec service
*/
@ -120,6 +130,7 @@ public class MasterSchedulerService extends Thread {
* constructor of MasterSchedulerService
*/
public void init() {
this.masterPrepareExecService = (ThreadPoolExecutor) ThreadUtils.newDaemonFixedThreadExecutor("Master-Pre-Exec-Thread", masterConfig.getPreExecThreads());
this.masterExecService = (ThreadPoolExecutor) ThreadUtils.newDaemonFixedThreadExecutor("Master-Exec-Thread", masterConfig.getExecThreads());
NettyClientConfig clientConfig = new NettyClientConfig();
this.nettyRemotingClient = new NettyRemotingClient(clientConfig);
@ -175,25 +186,29 @@ public class MasterSchedulerService extends Thread {
/**
* 1. get command by slot
* 2. donot handle command if slot is empty
*
* @throws Exception
*/
private void scheduleProcess() throws Exception {
List<Command> commands = findCommands();
if (CollectionUtils.isEmpty(commands)) {
//indicate that no command ,sleep for 1s
Thread.sleep(Constants.SLEEP_TIME_MILLIS);
return;
}
// make sure to scan and delete command table in one transaction
Command command = findOneCommand();
if (command != null) {
logger.info("find one command: id: {}, type: {}", command.getId(), command.getCommandType());
try {
ProcessInstance processInstance = processService.handleCommand(logger,
getLocalAddress(),
command,
processDefinitionCacheMaps);
if (!masterConfig.isCacheProcessDefinition()
&& processDefinitionCacheMaps.size() > 0) {
if (!masterConfig.isCacheProcessDefinition() && processDefinitionCacheMaps.size() > 0) {
processDefinitionCacheMaps.clear();
}
if (processInstance != null) {
List<ProcessInstance> processInstances = command2ProcessInstance(commands);
if (CollectionUtils.isEmpty(processInstances)) {
return;
}
for (ProcessInstance processInstance : processInstances) {
if (processInstance == null) {
continue;
}
WorkflowExecuteThread workflowExecuteThread = new WorkflowExecuteThread(
processInstance
, processService
@ -206,43 +221,75 @@ public class MasterSchedulerService extends Thread {
if (processInstance.getTimeout() > 0) {
this.processTimeoutCheckList.put(processInstance.getId(), processInstance);
}
logger.info("handle command end, command {} process {} start...",
command.getId(), processInstance.getId());
masterExecService.execute(workflowExecuteThread);
}
}
private List<ProcessInstance> command2ProcessInstance(List<Command> commands) {
if (CollectionUtils.isEmpty(commands)) {
return null;
}
ProcessInstance[] processInstances = new ProcessInstance[commands.size()];
CountDownLatch latch = new CountDownLatch(commands.size());
for (int i = 0; i < commands.size(); i++) {
int index = i;
this.masterPrepareExecService.execute(() -> {
Command command = commands.get(index);
// slot check again
if (!slotCheck(command)) {
latch.countDown();
return;
}
try {
ProcessInstance processInstance = processService.handleCommand(logger,
getLocalAddress(),
command,
processDefinitionCacheMaps);
if (processInstance != null) {
processInstances[index] = processInstance;
logger.info("handle command command {} end, create process instance {}",
command.getId(), processInstance.getId());
}
} catch (Exception e) {
logger.error("scan command error ", e);
processService.moveToErrorCommand(command, e.toString());
} finally {
latch.countDown();
}
} else {
//indicate that no command ,sleep for 1s
Thread.sleep(Constants.SLEEP_TIME_MILLIS);
});
}
try {
// make sure to finish handling command each time before next scan
latch.await();
} catch (InterruptedException e) {
logger.error("countDownLatch await error ", e);
}
return Arrays.asList(processInstances);
}
private Command findOneCommand() {
private List<Command> findCommands() {
int pageNumber = 0;
Command result = null;
int pageSize = masterConfig.getFetchCommandNum();
List<Command> result = new ArrayList<>();
while (Stopper.isRunning()) {
if (ServerNodeManager.MASTER_SIZE == 0) {
return null;
return result;
}
List<Command> commandList = processService.findCommandPage(ServerNodeManager.MASTER_SIZE, pageNumber);
List<Command> commandList = processService.findCommandPage(pageSize, pageNumber);
if (commandList.size() == 0) {
return null;
return result;
}
for (Command command : commandList) {
int slot = ServerNodeManager.getSlot();
if (ServerNodeManager.MASTER_SIZE != 0
&& command.getId() % ServerNodeManager.MASTER_SIZE == slot) {
result = command;
break;
if (slotCheck(command)) {
result.add(command);
}
}
if (result != null) {
logger.info("find command {}, slot:{} :",
result.getId(),
ServerNodeManager.getSlot());
if (CollectionUtils.isNotEmpty(result)) {
logger.info("find {} commands, slot:{}", result.size(), ServerNodeManager.getSlot());
break;
}
pageNumber += 1;
@ -250,6 +297,14 @@ public class MasterSchedulerService extends Thread {
return result;
}
private boolean slotCheck(Command command) {
int slot = ServerNodeManager.getSlot();
if (ServerNodeManager.MASTER_SIZE != 0 && command.getId() % ServerNodeManager.MASTER_SIZE == slot) {
return true;
}
return false;
}
private String getLocalAddress() {
return NetUtils.getAddr(masterConfig.getListenPort());
}

4
dolphinscheduler-server/src/main/resources/application-master.yaml

@ -20,6 +20,10 @@ spring:
master:
listen-port: 5678
# master fetch command num
fetch-command-num: 10
# master prepare execute thread number to limit handle commands in parallel
pre-exec-threads: 10
# master execute thread number to limit process instances in parallel
exec-threads: 100
# master execute task number in parallel per process instance

38
dolphinscheduler-service/src/main/java/org/apache/dolphinscheduler/service/process/ProcessService.java

@ -37,7 +37,6 @@ import org.apache.dolphinscheduler.common.enums.ExecutionStatus;
import org.apache.dolphinscheduler.common.enums.FailureStrategy;
import org.apache.dolphinscheduler.common.enums.Flag;
import org.apache.dolphinscheduler.common.enums.ReleaseState;
import org.apache.dolphinscheduler.spi.enums.ResourceType;
import org.apache.dolphinscheduler.common.enums.TaskDependType;
import org.apache.dolphinscheduler.common.enums.TimeoutFlag;
import org.apache.dolphinscheduler.common.enums.WarningType;
@ -102,8 +101,10 @@ import org.apache.dolphinscheduler.dao.utils.DagHelper;
import org.apache.dolphinscheduler.remote.command.StateEventChangeCommand;
import org.apache.dolphinscheduler.remote.processor.StateEventCallbackService;
import org.apache.dolphinscheduler.remote.utils.Host;
import org.apache.dolphinscheduler.service.exceptions.ServiceException;
import org.apache.dolphinscheduler.service.log.LogClientService;
import org.apache.dolphinscheduler.service.quartz.cron.CronUtils;
import org.apache.dolphinscheduler.spi.enums.ResourceType;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
@ -215,9 +216,9 @@ public class ProcessService {
* @param logger logger
* @param host host
* @param command found command
* @param processDefinitionCacheMaps
* @return process instance
*/
@Transactional
public ProcessInstance handleCommand(Logger logger, String host, Command command, HashMap<String, ProcessDefinition> processDefinitionCacheMaps) {
ProcessInstance processInstance = constructProcessInstance(command, host, processDefinitionCacheMaps);
// cannot construct process instance, return null
@ -233,15 +234,15 @@ public class ProcessService {
if (processDefinition.getExecutionType().typeIsSerial()) {
saveSerialProcess(processInstance, processDefinition);
if (processInstance.getState() != ExecutionStatus.SUBMITTED_SUCCESS) {
this.setSubProcessParam(processInstance);
this.commandMapper.deleteById(command.getId());
setSubProcessParam(processInstance);
deleteCommandWithCheck(command.getId());
return null;
}
} else {
saveProcessInstance(processInstance);
}
this.setSubProcessParam(processInstance);
this.commandMapper.deleteById(command.getId());
setSubProcessParam(processInstance);
deleteCommandWithCheck(command.getId());
return processInstance;
}
@ -345,10 +346,6 @@ public class ProcessService {
/**
* get command page
*
* @param pageSize
* @param pageNumber
* @return
*/
public List<Command> findCommandPage(int pageSize, int pageNumber) {
return commandMapper.queryCommandPage(pageSize, pageNumber * pageSize);
@ -781,7 +778,6 @@ public class ProcessService {
*
* @param command command
* @param host host
* @param processDefinitionCacheMaps
* @return process instance
*/
private ProcessInstance constructProcessInstance(Command command, String host, HashMap<String, ProcessDefinition> processDefinitionCacheMaps) {
@ -1091,11 +1087,6 @@ public class ProcessService {
/**
* retry submit task to db
*
* @param taskInstance
* @param commitRetryTimes
* @param commitInterval
* @return
*/
public TaskInstance submitTask(TaskInstance taskInstance, int commitRetryTimes, int commitInterval) {
@ -1589,7 +1580,8 @@ public class ProcessService {
private void updateTaskDefinitionResources(TaskDefinition taskDefinition) {
Map<String, Object> taskParameters = JSONUtils.parseObject(
taskDefinition.getTaskParams(),
new TypeReference<Map<String, Object>>() { });
new TypeReference<Map<String, Object>>() {
});
if (taskParameters != null) {
// if contains mainJar field, query resource from database
// Flink, Spark, MR
@ -1815,8 +1807,6 @@ public class ProcessService {
/**
* for show in page of taskInstance
*
* @param taskInstance
*/
public void changeOutParam(TaskInstance taskInstance) {
if (StringUtils.isEmpty(taskInstance.getVarPool())) {
@ -1827,7 +1817,8 @@ public class ProcessService {
return;
}
//if the result more than one line,just get the first .
Map<String, Object> taskParams = JSONUtils.parseObject(taskInstance.getTaskParams(), new TypeReference<Map<String, Object>>() {});
Map<String, Object> taskParams = JSONUtils.parseObject(taskInstance.getTaskParams(), new TypeReference<Map<String, Object>>() {
});
Object localParams = taskParams.get(LOCAL_PARAMS);
if (localParams == null) {
return;
@ -2547,4 +2538,11 @@ public class ProcessService {
public ProcessInstance loadNextProcess4Serial(long code, int state) {
return this.processInstanceMapper.loadNextProcess4Serial(code, state);
}
private void deleteCommandWithCheck(int commandId) {
int delete = this.commandMapper.deleteById(commandId);
if (delete != 1) {
throw new ServiceException("delete command fail, id:" + commandId);
}
}
}

65
dolphinscheduler-service/src/test/java/org/apache/dolphinscheduler/service/process/ProcessServiceTest.java

@ -60,6 +60,7 @@ import org.apache.dolphinscheduler.dao.mapper.TaskDefinitionLogMapper;
import org.apache.dolphinscheduler.dao.mapper.TaskDefinitionMapper;
import org.apache.dolphinscheduler.dao.mapper.TaskInstanceMapper;
import org.apache.dolphinscheduler.dao.mapper.UserMapper;
import org.apache.dolphinscheduler.service.exceptions.ServiceException;
import org.apache.dolphinscheduler.service.quartz.cron.CronUtilsTest;
import java.util.ArrayList;
@ -72,7 +73,9 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.runner.RunWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
@ -92,6 +95,9 @@ public class ProcessServiceTest {
private static final Logger logger = LoggerFactory.getLogger(CronUtilsTest.class);
@Rule
public final ExpectedException exception = ExpectedException.none();
@InjectMocks
private ProcessService processService;
@Mock
@ -255,10 +261,12 @@ public class ProcessServiceTest {
int processInstanceId = 222;
//there is not enough thread for this command
Command command1 = new Command();
command1.setId(1);
command1.setProcessDefinitionCode(definitionCode);
command1.setProcessDefinitionVersion(definitionVersion);
command1.setCommandParam("{\"ProcessInstanceId\":222}");
command1.setCommandType(CommandType.START_PROCESS);
Mockito.when(commandMapper.deleteById(1)).thenReturn(1);
ProcessDefinition processDefinition = new ProcessDefinition();
processDefinition.setId(123);
@ -284,31 +292,37 @@ public class ProcessServiceTest {
Assert.assertNotNull(processService.handleCommand(logger, host, command1, processDefinitionCacheMaps));
Command command2 = new Command();
command2.setId(2);
command2.setCommandParam("{\"ProcessInstanceId\":222,\"StartNodeIdList\":\"n1,n2\"}");
command2.setProcessDefinitionCode(definitionCode);
command2.setProcessDefinitionVersion(definitionVersion);
command2.setCommandType(CommandType.RECOVER_SUSPENDED_PROCESS);
command2.setProcessInstanceId(processInstanceId);
Mockito.when(commandMapper.deleteById(2)).thenReturn(1);
Assert.assertNotNull(processService.handleCommand(logger, host, command2, processDefinitionCacheMaps));
Command command3 = new Command();
command3.setId(3);
command3.setProcessDefinitionCode(definitionCode);
command3.setProcessDefinitionVersion(definitionVersion);
command3.setProcessInstanceId(processInstanceId);
command3.setCommandParam("{\"WaitingThreadInstanceId\":222}");
command3.setCommandType(CommandType.START_FAILURE_TASK_PROCESS);
Mockito.when(commandMapper.deleteById(3)).thenReturn(1);
Assert.assertNotNull(processService.handleCommand(logger, host, command3, processDefinitionCacheMaps));
Command command4 = new Command();
command4.setId(4);
command4.setProcessDefinitionCode(definitionCode);
command4.setProcessDefinitionVersion(definitionVersion);
command4.setCommandParam("{\"WaitingThreadInstanceId\":222,\"StartNodeIdList\":\"n1,n2\"}");
command4.setCommandType(CommandType.REPEAT_RUNNING);
command4.setProcessInstanceId(processInstanceId);
Mockito.when(commandMapper.deleteById(4)).thenReturn(1);
Assert.assertNotNull(processService.handleCommand(logger, host, command4, processDefinitionCacheMaps));
Command command5 = new Command();
command5.setId(5);
command5.setProcessDefinitionCode(definitionCode);
command5.setProcessDefinitionVersion(definitionVersion);
HashMap<String, String> startParams = new HashMap<>();
@ -318,6 +332,7 @@ public class ProcessServiceTest {
command5.setCommandParam(JSONUtils.toJsonString(commandParams));
command5.setCommandType(CommandType.START_PROCESS);
command5.setDryRun(Constants.DRY_RUN_FLAG_NO);
Mockito.when(commandMapper.deleteById(5)).thenReturn(1);
ProcessInstance processInstance1 = processService.handleCommand(logger, host, command5, processDefinitionCacheMaps);
Assert.assertTrue(processInstance1.getGlobalParams().contains("\"testStartParam1\""));
@ -342,14 +357,18 @@ public class ProcessServiceTest {
processInstance2.setProcessDefinitionVersion(1);
Mockito.when(processInstanceMapper.queryDetailById(223)).thenReturn(processInstance2);
Mockito.when(processDefineMapper.queryByCode(11L)).thenReturn(processDefinition1);
Mockito.when(commandMapper.deleteById(1)).thenReturn(1);
Assert.assertNotNull(processService.handleCommand(logger, host, command1, processDefinitionCacheMaps));
Command command6 = new Command();
command6.setId(6);
command6.setProcessDefinitionCode(11L);
command6.setCommandParam("{\"ProcessInstanceId\":223}");
command6.setCommandType(CommandType.RECOVER_SERIAL_WAIT);
command6.setProcessDefinitionVersion(1);
Mockito.when(processInstanceMapper.queryByProcessDefineCodeAndStatusAndNextId(11L,Constants.RUNNING_PROCESS_STATE,223)).thenReturn(lists);
Mockito.when(processInstanceMapper.updateNextProcessIdById(223, 222)).thenReturn(true);
Mockito.when(commandMapper.deleteById(6)).thenReturn(1);
ProcessInstance processInstance6 = processService.handleCommand(logger, host, command6, processDefinitionCacheMaps);
Assert.assertTrue(processInstance6 != null);
@ -362,10 +381,12 @@ public class ProcessServiceTest {
Mockito.when(processInstanceMapper.queryDetailById(224)).thenReturn(processInstance7);
Command command7 = new Command();
command7.setId(7);
command7.setProcessDefinitionCode(11L);
command7.setCommandParam("{\"ProcessInstanceId\":224}");
command7.setCommandType(CommandType.RECOVER_SERIAL_WAIT);
command7.setProcessDefinitionVersion(1);
Mockito.when(commandMapper.deleteById(7)).thenReturn(1);
Mockito.when(processInstanceMapper.queryByProcessDefineCodeAndStatusAndNextId(11L,Constants.RUNNING_PROCESS_STATE,224)).thenReturn(null);
ProcessInstance processInstance8 = processService.handleCommand(logger, host, command7, processDefinitionCacheMaps);
Assert.assertTrue(processInstance8 == null);
@ -382,6 +403,7 @@ public class ProcessServiceTest {
processInstance9.setProcessDefinitionCode(11L);
processInstance9.setProcessDefinitionVersion(1);
Command command9 = new Command();
command9.setId(9);
command9.setProcessDefinitionCode(12L);
command9.setCommandParam("{\"ProcessInstanceId\":225}");
command9.setCommandType(CommandType.RECOVER_SERIAL_WAIT);
@ -389,10 +411,51 @@ public class ProcessServiceTest {
Mockito.when(processInstanceMapper.queryDetailById(225)).thenReturn(processInstance9);
Mockito.when(processInstanceMapper.queryByProcessDefineCodeAndStatusAndNextId(12L,Constants.RUNNING_PROCESS_STATE,0)).thenReturn(lists);
Mockito.when(processInstanceMapper.updateById(processInstance)).thenReturn(1);
Mockito.when(commandMapper.deleteById(9)).thenReturn(1);
ProcessInstance processInstance10 = processService.handleCommand(logger, host, command9, processDefinitionCacheMaps);
Assert.assertTrue(processInstance10 == null);
}
@Test(expected = ServiceException.class)
public void testDeleteNotExistCommand() {
String host = "127.0.0.1";
int definitionVersion = 1;
long definitionCode = 123;
int processInstanceId = 222;
Command command1 = new Command();
command1.setId(1);
command1.setProcessDefinitionCode(definitionCode);
command1.setProcessDefinitionVersion(definitionVersion);
command1.setCommandParam("{\"ProcessInstanceId\":222}");
command1.setCommandType(CommandType.START_PROCESS);
ProcessDefinition processDefinition = new ProcessDefinition();
processDefinition.setId(123);
processDefinition.setName("test");
processDefinition.setVersion(definitionVersion);
processDefinition.setCode(definitionCode);
processDefinition.setGlobalParams("[{\"prop\":\"startParam1\",\"direct\":\"IN\",\"type\":\"VARCHAR\",\"value\":\"\"}]");
processDefinition.setExecutionType(ProcessExecutionTypeEnum.PARALLEL);
ProcessInstance processInstance = new ProcessInstance();
processInstance.setId(222);
processInstance.setProcessDefinitionCode(11L);
processInstance.setHost("127.0.0.1:5678");
processInstance.setProcessDefinitionVersion(1);
processInstance.setId(processInstanceId);
processInstance.setProcessDefinitionCode(definitionCode);
processInstance.setProcessDefinitionVersion(definitionVersion);
Mockito.when(processDefineMapper.queryByCode(command1.getProcessDefinitionCode())).thenReturn(processDefinition);
Mockito.when(processDefineLogMapper.queryByDefinitionCodeAndVersion(processInstance.getProcessDefinitionCode(),
processInstance.getProcessDefinitionVersion())).thenReturn(new ProcessDefinitionLog(processDefinition));
Mockito.when(processInstanceMapper.queryDetailById(222)).thenReturn(processInstance);
// will throw exception when command id is 0 and delete fail
processService.handleCommand(logger, host, command1, processDefinitionCacheMaps);
}
@Test
public void testGetUserById() {
User user = new User();

Loading…
Cancel
Save