分布式调度框架。
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

477 lines
15 KiB

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.plugin.task.api;
import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.EXIT_CODE_FAILURE;
import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.EXIT_CODE_KILL;
import static org.apache.dolphinscheduler.plugin.task.api.utils.ProcessUtils.getPidsStr;
import org.apache.dolphinscheduler.common.utils.PropertyUtils;
import org.apache.dolphinscheduler.plugin.task.api.model.TaskResponse;
import org.apache.dolphinscheduler.plugin.task.api.utils.AbstractCommandExecutorConstants;
import org.apache.dolphinscheduler.plugin.task.api.utils.OSUtils;
import org.apache.dolphinscheduler.plugin.task.api.utils.ProcessUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.SystemUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.lang.reflect.Field;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
* abstract command executor
*/
public abstract class AbstractCommandExecutor {
/**
* rules for extracting Var Pool
*/
protected static final Pattern SETVALUE_REGEX = Pattern.compile(TaskConstants.SETVALUE_REGEX);
protected StringBuilder varPool = new StringBuilder();
/**
* process
*/
private Process process;
/**
* log handler
*/
protected Consumer<LinkedBlockingQueue<String>> logHandler;
/**
* logger
*/
protected Logger logger;
/**
* log list
*/
protected LinkedBlockingQueue<String> logBuffer;
protected boolean logOutputIsSuccess = false;
/*
* SHELL result string
*/
protected String taskResultString;
/**
* taskRequest
*/
protected TaskExecutionContext taskRequest;
public AbstractCommandExecutor(Consumer<LinkedBlockingQueue<String>> logHandler,
TaskExecutionContext taskRequest,
Logger logger) {
this.logHandler = logHandler;
this.taskRequest = taskRequest;
this.logger = logger;
this.logBuffer = new LinkedBlockingQueue<>();
}
public AbstractCommandExecutor(LinkedBlockingQueue<String> logBuffer) {
this.logBuffer = logBuffer;
}
/**
* build process
*
* @param commandFile command file
* @throws IOException IO Exception
*/
private void buildProcess(String commandFile) throws IOException {
// setting up user to run commands
List<String> command = new LinkedList<>();
// init process builder
ProcessBuilder processBuilder = new ProcessBuilder();
// setting up a working directory
processBuilder.directory(new File(taskRequest.getExecutePath()));
// merge error information to standard output stream
processBuilder.redirectErrorStream(true);
// if sudo.enable=true,setting up user to run commands
if (OSUtils.isSudoEnable()) {
if (SystemUtils.IS_OS_LINUX
&& PropertyUtils.getBoolean(AbstractCommandExecutorConstants.TASK_RESOURCE_LIMIT_STATE)) {
generateCgroupCommand(command);
} else {
command.add("sudo");
command.add("-u");
command.add(taskRequest.getTenantCode());
command.add("-E");
}
}
command.add(commandInterpreter());
command.add(commandFile);
// setting commands
processBuilder.command(command);
process = processBuilder.start();
printCommand(command);
}
/**
* generate systemd command.
* eg: sudo systemd-run -q --scope -p CPUQuota=100% -p MemoryMax=200M --uid=root
* @param command command
*/
private void generateCgroupCommand(List<String> command) {
Integer cpuQuota = taskRequest.getCpuQuota();
Integer memoryMax = taskRequest.getMemoryMax();
command.add("sudo");
command.add("systemd-run");
command.add("-q");
command.add("--scope");
if (cpuQuota == -1) {
command.add("-p");
command.add("CPUQuota=");
} else {
command.add("-p");
command.add(String.format("CPUQuota=%s%%", taskRequest.getCpuQuota()));
}
if (memoryMax == -1) {
command.add("-p");
command.add(String.format("MemoryMax=%s", "infinity"));
} else {
command.add("-p");
command.add(String.format("MemoryMax=%sM", taskRequest.getMemoryMax()));
}
command.add(String.format("--uid=%s", taskRequest.getTenantCode()));
}
public TaskResponse run(String execCommand) throws IOException, InterruptedException {
TaskResponse result = new TaskResponse();
int taskInstanceId = taskRequest.getTaskInstanceId();
if (null == TaskExecutionContextCacheManager.getByTaskInstanceId(taskInstanceId)) {
result.setExitStatusCode(EXIT_CODE_KILL);
return result;
}
if (StringUtils.isEmpty(execCommand)) {
TaskExecutionContextCacheManager.removeByTaskInstanceId(taskInstanceId);
return result;
}
String commandFilePath = buildCommandFilePath();
// create command file if not exists
createCommandFileIfNotExists(execCommand, commandFilePath);
// build process
buildProcess(commandFilePath);
// parse process output
parseProcessOutput(process);
int processId = getProcessId(process);
result.setProcessId(processId);
// cache processId
taskRequest.setProcessId(processId);
boolean updateTaskExecutionContextStatus =
TaskExecutionContextCacheManager.updateTaskExecutionContext(taskRequest);
if (Boolean.FALSE.equals(updateTaskExecutionContextStatus)) {
ProcessUtils.kill(taskRequest);
result.setExitStatusCode(EXIT_CODE_KILL);
return result;
}
// print process id
logger.info("process start, process id is: {}", processId);
// if timeout occurs, exit directly
long remainTime = getRemainTime();
// waiting for the run to finish
boolean status = process.waitFor(remainTime, TimeUnit.SECONDS);
// if SHELL task exit
if (status) {
// SHELL task state
result.setExitStatusCode(process.exitValue());
} else {
logger.error("process has failure, the task timeout configuration value is:{}, ready to kill ...",
taskRequest.getTaskTimeout());
ProcessUtils.kill(taskRequest);
result.setExitStatusCode(EXIT_CODE_FAILURE);
}
int exitCode = process.exitValue();
String exitLogMessage = EXIT_CODE_KILL == exitCode ? "process has killed." : "process has exited.";
logger.info(exitLogMessage
+ " execute path:{}, processId:{} ,exitStatusCode:{} ,processWaitForStatus:{} ,processExitValue:{}",
taskRequest.getExecutePath(), processId, result.getExitStatusCode(), status, exitCode);
return result;
}
public String getVarPool() {
return varPool.toString();
}
/**
* cancel application
*
* @throws Exception exception
*/
public void cancelApplication() throws Exception {
if (process == null) {
return;
}
// clear log
clear();
int processId = getProcessId(process);
logger.info("cancel process: {}", processId);
// kill , waiting for completion
boolean alive = softKill(processId);
if (alive) {
// hard kill
hardKill(processId);
}
}
/**
* soft kill
*
* @param processId process id
* @return process is alive
*/
private boolean softKill(int processId) {
if (processId != 0 && process.isAlive()) {
try {
// sudo -u user command to run command
String cmd = String.format("kill %d", processId);
cmd = OSUtils.getSudoCmd(taskRequest.getTenantCode(), cmd);
logger.info("soft kill task:{}, process id:{}, cmd:{}", taskRequest.getTaskAppId(), processId, cmd);
Runtime.getRuntime().exec(cmd);
} catch (IOException e) {
logger.info("kill attempt failed", e);
}
}
return process.isAlive();
}
/**
* hard kill
*
* @param processId process id
*/
private void hardKill(int processId) {
if (processId != 0 && process.isAlive()) {
try {
String cmd = String.format("kill -9 %s", getPidsStr(processId));
cmd = OSUtils.getSudoCmd(taskRequest.getTenantCode(), cmd);
logger.info("hard kill task:{}, process id:{}, cmd:{}", taskRequest.getTaskAppId(), processId, cmd);
OSUtils.exeCmd(cmd);
} catch (Exception e) {
logger.error("kill attempt failed ", e);
}
}
}
private void printCommand(List<String> commands) {
logger.info("task run command: {}", String.join(" ", commands));
}
/**
* clear
*/
private void clear() {
LinkedBlockingQueue<String> markerLog = new LinkedBlockingQueue<>(1);
markerLog.add(ch.qos.logback.classic.ClassicConstants.FINALIZE_SESSION_MARKER.toString());
if (!logBuffer.isEmpty()) {
// log handle
logHandler.accept(logBuffer);
logBuffer.clear();
}
logHandler.accept(markerLog);
}
/**
* get the standard output of the process
*
* @param process process
*/
private void parseProcessOutput(Process process) {
String threadLoggerInfoName = taskRequest.getTaskLogName();
ExecutorService getOutputLogService = newDaemonSingleThreadExecutor(threadLoggerInfoName);
getOutputLogService.submit(() -> {
try (BufferedReader inReader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
String line;
while ((line = inReader.readLine()) != null) {
if (line.startsWith("${setValue(") || line.startsWith("#{setValue(")) {
varPool.append(findVarPool(line));
varPool.append("$VarPool$");
} else {
logBuffer.add(line);
taskResultString = line;
}
}
logOutputIsSuccess = true;
} catch (Exception e) {
logger.error(e.getMessage(), e);
logOutputIsSuccess = true;
}
});
getOutputLogService.shutdown();
ExecutorService parseProcessOutputExecutorService = newDaemonSingleThreadExecutor(threadLoggerInfoName);
parseProcessOutputExecutorService.submit(() -> {
try {
long lastFlushTime = System.currentTimeMillis();
while (logBuffer.size() > 0 || !logOutputIsSuccess) {
if (logBuffer.size() > 0) {
lastFlushTime = flush(lastFlushTime);
} else {
Thread.sleep(TaskConstants.DEFAULT_LOG_FLUSH_INTERVAL);
}
}
} catch (Exception e) {
[SPI][TASK]TaskPlugin (#6122) * [Improvement][dao]When I search for the keyword description, the web UI shows empty (#5952) * [Bug][WorkerServer] SqlTask NullPointerException #5549 * [Improvement][dao]When I search for the keyword Modify User, the web UI shows empty #5428 * [Improvement][dao]When I search for the keyword Modify User, the web UI shows empty #5428 * [Improvement][dao]When I search for the keyword Modify User, the web UI shows empty #5428 * [Improvement][dao]When I search for the keyword Modify User, the web UI shows empty #5428 * [Improvement][dao]When I search for the keyword Modify User, the web UI shows empty #5428 * [Improvement][dao]When I search for the keyword Modify User, the web UI shows empty #5428 * [Improvement][dao]When I search for the keyword description, the web UI shows empty #5428 * fix the readme typing issue (#5998) * Fix unchecked type conversions * Use indentation level reported by checkstyle * Reorganize CI workflows to fasten the wasted time and resources (#6011) * Add standalone server module to make it easier to develop (#6022) * Task node of SWITCH (#5939) * [Feature-#5273][server-master] Task node of SWITCH (#5922) Co-authored-by: wangxj <wangxj31> * remove description of bonecp (#6030) Co-authored-by: shaojwu <shaojwu@ebay.com> * [Improvement][Api Module]split alert group list-paging interface (#5941) * [Improvement][Api Module]split alert group list-paging interface * [FIX-#6007]Wrong complement date (#6026) * [FIX-#6007]Wrong complement date * [style]Wrong complement date * [Improvement-6024][dist] Remove useless packaging commands (#6029) ·Remove useless packaging commands in dolphinscheduler-bin.xml This closes #6024 Co-authored-by: mask <liuhu@zhiyoutec.com> * [FIX-5908][MasterServer] When executing an compensation task, the execution thread would have a NPE (#5909) * fix the npe in MasterExec * fix the compile error * Add `.asf.yaml` to easily set the GitHub metadata (#6035) * fix dead server cannot stop (#6046) * Enhancement Translation (#6042) * replaced Loading... with i18n * modified Edit zh_CN translation * Delete zh_CN.js Co-authored-by: David <dailidong66@gmail.com> * fix bug #6053 zh_CN.js is lost * [Fix-6038][ui] width of "SQL Statement" in Dag FormLineModal will be shrunk if sql line is too long (#6040) This closes #6038 * [Improvement] Fix inefficient map iterator (#6004) * Fix inefficient map iterator * Use forEach and remove call to valueOf * Modify AbstractParameters * Enhance `StandaloneServer` so that we don't need to update the version number manually (#6074) * Remove invalid character in `.asf.yaml` (#6075) * Remove invalid character `\n` in `.asf.yaml` (#6077) It turns out that the invalid character is `\n` * Add alert server into standalone-server as well and some minor polish (#6087) * Support starting standalone server in Docker image (#6102) Also remove unused class * [Feature-4355][Master-Worker-API] improvements of master and scheduler module (#6095) * [Feature-4355][Master-Worker-API] improvements of master and scheduler module (#6085) * master refactor: 1. spi for task submit and other actions(pause, kill) 2. remove threads for process instance and task instance. 3. add events for process instance and task instance * ut npe * add try catch * code style * fix critical bugs * fix critical bugs * fix critical bugs * fix critical bugs * Remove unused params in SwitchTaskTest (#6109) * [Feature-5987][Server] Support to set multiple environment configs for a certain worker. (#6082) * support multi environments * add some test cases * add an environment vue component * improve environment form * improve environment form * add environment worker group relation * add environment worker group relation * add the environment choice for formModel * set an environment for the task * modify the modal form of starting process * add the environment config to TaskExecutionContext * add the environment config to the timing form * fix conflicts * fix issues of the code style * fix some issues of the code style * fix some issues of the code style * fix some issues of the code style * fix some issues of the code style * fix some issues of the code style * fix some bugs in the code review * add the same table and columns to support H2 * fix some bugs * [Plugin][Task]Task Spi * [Plugin][Task]Task Spi * [Plugin][Task]Task Spi * [Plugin][Task]Task Spi * [Plugin][Task]Task Spi * [Plugin][Task]Task Spi Co-authored-by: didiaode18 <563646039@qq.com> Co-authored-by: Roy <yongjuncao1213@gmail.com> Co-authored-by: lyxell <alyxell@kth.se> Co-authored-by: Wenjun Ruan <wenjun@apache.org> Co-authored-by: kezhenxu94 <kezhenxu94@apache.org> Co-authored-by: myangle1120 <942542838@qq.com> Co-authored-by: wangxj3 <857234426@qq.com> Co-authored-by: gabry.wu <gabrywu@apache.org> Co-authored-by: shaojwu <shaojwu@ebay.com> Co-authored-by: Shukun Zhang <60541766+andream7@users.noreply.github.com> Co-authored-by: linquan <1175687813@qq.com> Co-authored-by: mask <39329477+Narcasserun@users.noreply.github.com> Co-authored-by: mask <liuhu@zhiyoutec.com> Co-authored-by: kyoty <echohlne@gmail.com> Co-authored-by: RichardStark <49977764+RichardStark@users.noreply.github.com> Co-authored-by: David <dailidong66@gmail.com> Co-authored-by: lenboo <baoliang.leon@gmail.com> Co-authored-by: lilyzhou <lj_zhou@outlook.com> Co-authored-by: OS <29528966+lenboo@users.noreply.github.com> Co-authored-by: junfan.zhang <zuston.shacha@gmail.com> Co-authored-by: Hua Jiang <jianghuachinacom@163.com>
3 years ago
Thread.currentThread().interrupt();
logger.error(e.getMessage(), e);
} finally {
clear();
}
});
parseProcessOutputExecutorService.shutdown();
}
/**
* find var pool
*
* @param line
* @return
*/
private String findVarPool(String line) {
Matcher matcher = SETVALUE_REGEX.matcher(line);
if (matcher.find()) {
return matcher.group(1);
}
return null;
}
/**
* get remain times
*
* @return remain time
*/
private long getRemainTime() {
long usedTime = (System.currentTimeMillis() - taskRequest.getStartTime()) / 1000;
long remainTime = taskRequest.getTaskTimeout() - usedTime;
if (remainTime < 0) {
throw new RuntimeException("task execution time out");
}
return remainTime;
}
/**
* get process id
*
* @param process process
* @return process id
*/
private int getProcessId(Process process) {
int processId = 0;
try {
Field f = process.getClass().getDeclaredField(TaskConstants.PID);
f.setAccessible(true);
processId = f.getInt(process);
} catch (Throwable e) {
logger.error(e.getMessage(), e);
}
return processId;
}
/**
* when log buffer siz or flush time reach condition , then flush
*
* @param lastFlushTime last flush time
* @return last flush time
*/
private long flush(long lastFlushTime) {
long now = System.currentTimeMillis();
/*
* when log buffer siz or flush time reach condition , then flush
*/
if (logBuffer.size() >= TaskConstants.DEFAULT_LOG_ROWS_NUM
|| now - lastFlushTime > TaskConstants.DEFAULT_LOG_FLUSH_INTERVAL) {
lastFlushTime = now;
logHandler.accept(logBuffer);
logBuffer.clear();
}
return lastFlushTime;
}
protected abstract String buildCommandFilePath();
protected abstract void createCommandFileIfNotExists(String execCommand, String commandFile) throws IOException;
ExecutorService newDaemonSingleThreadExecutor(String threadName) {
ThreadFactory threadFactory = new ThreadFactoryBuilder()
.setDaemon(true)
.setNameFormat(threadName)
.build();
return Executors.newSingleThreadExecutor(threadFactory);
}
protected abstract String commandInterpreter();
}