From 5c45f92f463b5d6c4b811df1058cc712da67dd7e Mon Sep 17 00:00:00 2001 From: ligang Date: Fri, 29 Mar 2019 13:49:42 +0800 Subject: [PATCH] Initial module escheduler-server commit --- escheduler-server/pom.xml | 127 ++ .../src/main/assembly/package.xml | 74 ++ .../cn/escheduler/server/BeanContext.java | 51 + .../java/cn/escheduler/server/ResInfo.java | 102 ++ .../server/master/MasterServer.java | 321 +++++ .../server/master/log/MasterLogFilter.java | 42 + .../runner/MasterBaseTaskExecThread.java | 132 +++ .../master/runner/MasterExecThread.java | 1036 +++++++++++++++++ .../master/runner/MasterSchedulerThread.java | 118 ++ .../master/runner/MasterTaskExecThread.java | 165 +++ .../runner/SubProcessTaskExecThread.java | 178 +++ .../cn/escheduler/server/rpc/LogClient.java | 122 ++ .../escheduler/server/rpc/LoggerServer.java | 204 ++++ .../escheduler/server/utils/AlertManager.java | 236 ++++ .../escheduler/server/utils/LoggerUtils.java | 78 ++ .../escheduler/server/utils/ParamUtils.java | 103 ++ .../escheduler/server/utils/ProcessUtils.java | 301 +++++ .../server/utils/SparkArgsUtils.java | 117 ++ .../cn/escheduler/server/utils/UDFUtils.java | 109 ++ .../server/worker/WorkerServer.java | 366 ++++++ .../server/worker/log/TaskLogAppender.java | 59 + .../server/worker/log/TaskLogFilter.java | 35 + .../server/worker/log/TaskLogger.java | 345 ++++++ .../server/worker/log/WorkerLogFilter.java | 40 + .../server/worker/runner/FetchTaskThread.java | 202 ++++ .../worker/runner/TaskScheduleThread.java | 310 +++++ .../worker/task/AbstractCommandExecutor.java | 557 +++++++++ .../server/worker/task/AbstractTask.java | 99 ++ .../server/worker/task/AbstractYarnTask.java | 91 ++ .../worker/task/PythonCommandExecutor.java | 112 ++ .../worker/task/ShellCommandExecutor.java | 97 ++ .../server/worker/task/TaskManager.java | 67 ++ .../server/worker/task/TaskProps.java | 236 ++++ .../task/dependent/DependentExecute.java | 213 ++++ .../worker/task/dependent/DependentTask.java | 163 +++ .../server/worker/task/mr/MapReduceTask.java | 143 +++ .../task/processdure/ProcedureTask.java | 328 ++++++ .../server/worker/task/python/PythonTask.java | 165 +++ .../server/worker/task/shell/ShellTask.java | 159 +++ .../server/worker/task/spark/SparkTask.java | 112 ++ .../server/worker/task/sql/SqlTask.java | 378 ++++++ .../escheduler/server/zk/ZKMasterClient.java | 465 ++++++++ .../escheduler/server/zk/ZKWorkerClient.java | 286 +++++ .../resources/application_master.properties | 1 + .../src/main/resources/master.properties | 21 + .../src/main/resources/master_logback.xml | 34 + .../src/main/resources/worker.properties | 15 + .../src/main/resources/worker_logback.xml | 53 + .../server/master/AlertManagerTest.java | 109 ++ .../server/master/MasterCommandTest.java | 107 ++ .../escheduler/server/master/ParamsTest.java | 102 ++ .../shell/ShellCommandExecutorTest.java | 103 ++ .../server/worker/sql/SqlExecutorTest.java | 104 ++ .../task/dependent/DependentTaskTest.java | 65 ++ .../server/zk/ZKWorkerClientTest.java | 20 + 55 files changed, 9378 insertions(+) create mode 100644 escheduler-server/pom.xml create mode 100644 escheduler-server/src/main/assembly/package.xml create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/BeanContext.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/ResInfo.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/master/MasterServer.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/master/log/MasterLogFilter.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterBaseTaskExecThread.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterExecThread.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterSchedulerThread.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterTaskExecThread.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/master/runner/SubProcessTaskExecThread.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/rpc/LogClient.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/rpc/LoggerServer.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/utils/AlertManager.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/utils/LoggerUtils.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/utils/ParamUtils.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/utils/ProcessUtils.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/utils/SparkArgsUtils.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/utils/UDFUtils.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/WorkerServer.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/log/TaskLogAppender.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/log/TaskLogFilter.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/log/TaskLogger.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/log/WorkerLogFilter.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/runner/FetchTaskThread.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/runner/TaskScheduleThread.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/AbstractCommandExecutor.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/AbstractTask.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/AbstractYarnTask.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/PythonCommandExecutor.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/ShellCommandExecutor.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/TaskManager.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/TaskProps.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/dependent/DependentExecute.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/dependent/DependentTask.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/mr/MapReduceTask.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/processdure/ProcedureTask.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/python/PythonTask.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/shell/ShellTask.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/spark/SparkTask.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/worker/task/sql/SqlTask.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/zk/ZKMasterClient.java create mode 100644 escheduler-server/src/main/java/cn/escheduler/server/zk/ZKWorkerClient.java create mode 100644 escheduler-server/src/main/resources/application_master.properties create mode 100644 escheduler-server/src/main/resources/master.properties create mode 100644 escheduler-server/src/main/resources/master_logback.xml create mode 100644 escheduler-server/src/main/resources/worker.properties create mode 100644 escheduler-server/src/main/resources/worker_logback.xml create mode 100644 escheduler-server/src/test/java/cn/escheduler/server/master/AlertManagerTest.java create mode 100644 escheduler-server/src/test/java/cn/escheduler/server/master/MasterCommandTest.java create mode 100644 escheduler-server/src/test/java/cn/escheduler/server/master/ParamsTest.java create mode 100644 escheduler-server/src/test/java/cn/escheduler/server/worker/shell/ShellCommandExecutorTest.java create mode 100644 escheduler-server/src/test/java/cn/escheduler/server/worker/sql/SqlExecutorTest.java create mode 100644 escheduler-server/src/test/java/cn/escheduler/server/worker/task/dependent/DependentTaskTest.java create mode 100644 escheduler-server/src/test/java/cn/escheduler/server/zk/ZKWorkerClientTest.java diff --git a/escheduler-server/pom.xml b/escheduler-server/pom.xml new file mode 100644 index 0000000000..db51e56d7d --- /dev/null +++ b/escheduler-server/pom.xml @@ -0,0 +1,127 @@ + + 4.0.0 + + escheduler + cn.analysys + 1.0.0 + + escheduler-server + escheduler-server + http://maven.apache.org + jar + + UTF-8 + + + + + cn.analysys + escheduler-common + + + io.netty + netty + + + io.netty + netty-all + + + com.google + netty + + + log4j-slf4j-impl + org.apache.logging.log4j + + + + + cn.analysys + escheduler-dao + + + spring-boot-starter-logging + org.springframework.boot + + + + + cn.analysys + escheduler-api + + + cn.analysys + escheduler-rpc + + + org.apache.curator + curator-framework + + + org.apache.zookeeper + zookeeper + + + + + org.apache.curator + curator-recipes + + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + junit + junit + test + + + + cn.analysys + escheduler-alert + + + + + + + + + maven-assembly-plugin + + + src/main/assembly/package.xml + + false + + + + make-assembly + package + + single + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + ${java.version} + ${java.version} + ${project.build.sourceEncoding} + + + + + + diff --git a/escheduler-server/src/main/assembly/package.xml b/escheduler-server/src/main/assembly/package.xml new file mode 100644 index 0000000000..7a8188c05b --- /dev/null +++ b/escheduler-server/src/main/assembly/package.xml @@ -0,0 +1,74 @@ + + cluster + + dir + + false + + + src/main/resources + + **/*.properties + **/*.xml + **/*.json + + conf + + + ${project.parent.basedir}/escheduler-common/src/main/resources + + **/*.properties + **/*.xml + **/*.json + + conf + + + ${project.parent.basedir}/escheduler-common/src/main/resources/bin + + *.* + + 755 + bin + + + ${project.parent.basedir}/escheduler-dao/src/main/resources + + **/*.properties + **/*.xml + **/*.json + + conf + + + ${project.parent.basedir}/escheduler-api/src/main/resources + + **/*.properties + **/*.xml + **/*.json + + conf + + + target/ + + escheduler-server-${project.version}.jar + + lib + + + + + lib + true + + javax.servlet:servlet-api + org.eclipse.jetty.aggregate:jetty-all + org.slf4j:slf4j-log4j12 + + + + \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/BeanContext.java b/escheduler-server/src/main/java/cn/escheduler/server/BeanContext.java new file mode 100644 index 0000000000..7b653963fb --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/BeanContext.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server; + + +import org.springframework.beans.BeansException; +import org.springframework.context.ApplicationContext; +import org.springframework.context.ApplicationContextAware; +import org.springframework.stereotype.Component; + +/** + * bean context + */ +@Component + public class BeanContext implements ApplicationContextAware { + private static ApplicationContext applicationContext; + + public static ApplicationContext getApplicationContext(){ + return applicationContext; + } + + @SuppressWarnings("unchecked") + public static T getBean(String name) throws BeansException { + return (T)applicationContext.getBean(name); + } + + public static T getBean(Class clz) throws BeansException { + return applicationContext.getBean(clz); + } + + + + @Override + public void setApplicationContext(ApplicationContext applicationContext) throws BeansException { + BeanContext.applicationContext = applicationContext; + } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/ResInfo.java b/escheduler-server/src/main/java/cn/escheduler/server/ResInfo.java new file mode 100644 index 0000000000..91d6021749 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/ResInfo.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.utils.JSONUtils; +import cn.escheduler.common.utils.OSUtils; + +/** + * heartbeat for ZK reigster res info + */ +public class ResInfo { + + /** + * cpuUsage + */ + private double cpuUsage; + + /** + * memoryUsage + */ + private double memoryUsage; + + public ResInfo(){} + + public ResInfo(double cpuUsage , double memoryUsage){ + this.cpuUsage = cpuUsage ; + this.memoryUsage = memoryUsage; + } + + public double getCpuUsage() { + return cpuUsage; + } + + public void setCpuUsage(double cpuUsage) { + this.cpuUsage = cpuUsage; + } + + public double getMemoryUsage() { + return memoryUsage; + } + + public void setMemoryUsage(double memoryUsage) { + this.memoryUsage = memoryUsage; + } + + /** + * get CPU and memory usage + * @return + */ + public static String getResInfoJson(){ + ResInfo resInfo = new ResInfo(OSUtils.cpuUsage(), OSUtils.memoryUsage()); + return JSONUtils.toJson(resInfo); + } + + + /** + * get CPU and memory usage + * @return + */ + public static String getResInfoJson(double cpuUsage , double memoryUsage){ + ResInfo resInfo = new ResInfo(cpuUsage,memoryUsage); + return JSONUtils.toJson(resInfo); + } + + + /** + * build heartbeat info for zk + * @param host + * @param port + * @param cpuUsage + * @param memoryUsage + * @param createTime + * @param lastHeartbeatTime + * @return + */ + public static String buildHeartbeatForZKInfo(String host , int port , + double cpuUsage , double memoryUsage, + String createTime,String lastHeartbeatTime){ + + return host + Constants.COMMA + port + Constants.COMMA + + cpuUsage + Constants.COMMA + + memoryUsage + Constants.COMMA + + createTime + Constants.COMMA + + lastHeartbeatTime; + } + +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/master/MasterServer.java b/escheduler-server/src/main/java/cn/escheduler/server/master/MasterServer.java new file mode 100644 index 0000000000..95705dd017 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/master/MasterServer.java @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.master; + +import cn.escheduler.api.quartz.ProcessScheduleJob; +import cn.escheduler.api.quartz.QuartzExecutors; +import cn.escheduler.common.Constants; +import cn.escheduler.common.IStoppable; +import cn.escheduler.common.thread.Stopper; +import cn.escheduler.common.thread.ThreadPoolExecutors; +import cn.escheduler.common.thread.ThreadUtils; +import cn.escheduler.common.utils.OSUtils; +import cn.escheduler.dao.AlertDao; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.ServerDao; +import cn.escheduler.server.master.runner.MasterSchedulerThread; +import cn.escheduler.server.zk.ZKMasterClient; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.configuration.ConfigurationException; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.commons.lang3.StringUtils; +import org.quartz.SchedulerException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.CommandLineRunner; +import org.springframework.boot.SpringApplication; +import org.springframework.context.annotation.ComponentScan; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +/** + * master server + */ +@ComponentScan("cn.escheduler") +public class MasterServer implements CommandLineRunner, IStoppable { + + private static final Logger logger = LoggerFactory.getLogger(MasterServer.class); + + /** + * conf + */ + private static Configuration conf; + + /** + * object lock + */ + private final Object lock = new Object(); + + /** + * whether or not to close the state + */ + private boolean terminated = false; + + /** + * zk master client + */ + private static ZKMasterClient zkMasterClient=null; + + + /** + * master dao database access + */ + private ServerDao serverDao = null; + + /** + * alert database access + */ + private AlertDao alertDao = null; + + /** + * escheduler database interface + */ + @Autowired + private ProcessDao processDao; + + /** + * heartbeat thread pool + */ + private ScheduledExecutorService heartbeatMasterService; + + + /** + * master exec thread pool + */ + private final ExecutorService masterSchedulerService = ThreadUtils.newDaemonSingleThreadExecutor("Master-Scheduler-Thread"); + + /** + * heartbeat interval, unit second + */ + private int heartBeatInterval; + + static { + try { + conf = new PropertiesConfiguration(Constants.MASTER_PROPERTIES_PATH); + }catch (ConfigurationException e){ + logger.error("load configuration failed : " + e.getMessage(),e); + System.exit(1); + } + } + + public MasterServer(){} + + public MasterServer(ProcessDao processDao){ + zkMasterClient = ZKMasterClient.getZKMasterClient(processDao); + this.serverDao = zkMasterClient.getServerDao(); + this.alertDao = zkMasterClient.getAlertDao(); + } + public void run(ProcessDao processDao){ + + // heartbeat interval + heartBeatInterval = conf.getInt(Constants.MASTER_HEARTBEAT_INTERVAL, + Constants.defaultMasterHeartbeatInterval); + + heartbeatMasterService = ThreadUtils.newDaemonThreadScheduledExecutor("Master-Main-Thread",Constants.defaulMasterHeartbeatThreadNum); + + // heartbeat thread implement + Runnable heartBeatThread = heartBeatThread(); + + zkMasterClient.setStoppable(this); + + // regular heartbeat + // delay 5 seconds, send heartbeat every 30 seconds + heartbeatMasterService. + scheduleAtFixedRate(heartBeatThread, 5, heartBeatInterval, TimeUnit.SECONDS); + + // master exec thread pool num + int masterExecThreadNum = conf.getInt(Constants.MASTER_EXEC_THREADS, + Constants.defaultMasterExecThreadNum); + + // master scheduler thread + MasterSchedulerThread masterSchedulerThread = new MasterSchedulerThread( + zkMasterClient, + processDao,conf, + masterExecThreadNum); + + // submit master scheduler thread + masterSchedulerService.execute(masterSchedulerThread); + + // start QuartzExecutors + try { + ProcessScheduleJob.init(processDao); + QuartzExecutors.getInstance().start(); + } catch (Exception e) { + try { + QuartzExecutors.getInstance().shutdown(); + } catch (SchedulerException e1) { + logger.error("QuartzExecutors shutdown failed : " + e1.getMessage(), e1); + } + logger.error("start Quartz failed : " + e.getMessage(), e); + } + + + /** + * register hooks, which are called before the process exits + */ + Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { + @Override + public void run() { + String host = OSUtils.getHost(); + // clear master table register info + serverDao.deleteMaster(host); + logger.info("master server stopped"); + if (zkMasterClient.getActiveMasterNum() <= 1) { + for (int i = 0; i < Constants.ESCHEDULER_WARN_TIMES_FAILOVER;i++) { + alertDao.sendServerStopedAlert(1, host, "Master-Server"); + } + } + } + })); + } + + + public static void main(String[] args) { + SpringApplication app = new SpringApplication(MasterServer.class); + app.setWebEnvironment(false); + app.run(args); + } + + + /** + * blocking implement + * @throws InterruptedException + */ + public void awaitTermination() throws InterruptedException { + synchronized (lock) { + while (!terminated) { + lock.wait(); + } + } + } + + /** + * heartbeat thread implement + * @return + */ + public Runnable heartBeatThread(){ + Runnable heartBeatThread = new Runnable() { + @Override + public void run() { + if(Stopper.isRunning()) { + // send heartbeat to zk + if (StringUtils.isBlank(zkMasterClient.getMasterZNode())) { + logger.error("master send heartbeat to zk failed"); + return; + } + + zkMasterClient.heartBeatForZk(zkMasterClient.getMasterZNode(), Constants.MASTER_PREFIX); + } + } + }; + return heartBeatThread; + } + + @Override + public void run(String... strings) throws Exception { + + MasterServer masterServer = new MasterServer(processDao); + + masterServer.run(processDao); + + logger.info("master server started"); + // blocking + masterServer.awaitTermination(); + + + } + + /** + * gracefully stop + * @param cause why stopping + */ + @Override + public synchronized void stop(String cause) { + + try { + //execute only once + if(Stopper.isStoped()){ + return; + } + + logger.info("master server is stopping ..., cause : {}", cause); + + // set stop signal is true + Stopper.stop(); + + try { + //thread sleep 3 seconds for thread quitely stop + Thread.sleep(3000L); + }catch (Exception e){ + logger.warn("thread sleep exception:" + e.getMessage(), e); + } + try { + heartbeatMasterService.shutdownNow(); + }catch (Exception e){ + logger.warn("heartbeat service stopped exception"); + } + + logger.info("heartbeat service stopped"); + + //close quartz + try{ + QuartzExecutors.getInstance().shutdown(); + }catch (Exception e){ + logger.warn("Quartz service stopped exception:{}",e.getMessage()); + } + + logger.info("Quartz service stopped"); + + try { + ThreadPoolExecutors.getInstance().shutdown(); + }catch (Exception e){ + logger.warn("threadpool service stopped exception:{}",e.getMessage()); + } + + logger.info("threadpool service stopped"); + + try { + masterSchedulerService.shutdownNow(); + }catch (Exception e){ + logger.warn("master scheduler service stopped exception:{}",e.getMessage()); + } + + logger.info("master scheduler service stopped"); + + try { + zkMasterClient.close(); + }catch (Exception e){ + logger.warn("zookeeper service stopped exception:{}",e.getMessage()); + } + + logger.info("zookeeper service stopped"); + + synchronized (lock) { + terminated = true; + lock.notifyAll(); + } + + } catch (Exception e) { + logger.error("master server stop exception : " + e.getMessage(), e); + System.exit(-1); + } + } +} + diff --git a/escheduler-server/src/main/java/cn/escheduler/server/master/log/MasterLogFilter.java b/escheduler-server/src/main/java/cn/escheduler/server/master/log/MasterLogFilter.java new file mode 100644 index 0000000000..fdacd6d7fe --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/master/log/MasterLogFilter.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.master.log; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.filter.Filter; +import ch.qos.logback.core.spi.FilterReply; + +/** + * master log filter + */ +public class MasterLogFilter extends Filter { + + Level level; + + @Override + public FilterReply decide(ILoggingEvent event) { + if (event.getThreadName().startsWith("Master-")){ + return FilterReply.ACCEPT; + } + return FilterReply.DENY; + } + + public void setLevel(String level) { + this.level = Level.toLevel(level); + } +} \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterBaseTaskExecThread.java b/escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterBaseTaskExecThread.java new file mode 100644 index 0000000000..ed427992bf --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterBaseTaskExecThread.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.master.runner; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.queue.ITaskQueue; +import cn.escheduler.common.queue.TaskQueueFactory; +import cn.escheduler.dao.AlertDao; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.server.BeanContext; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.configuration.ConfigurationException; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.Callable; + +/** + * master task exec base class + */ +public class MasterBaseTaskExecThread implements Callable { + + private static final Logger logger = LoggerFactory.getLogger(MasterBaseTaskExecThread.class); + + /** + * process dao + */ + protected ProcessDao processDao; + + /** + * alert database access + */ + protected AlertDao alertDao; + + /** + * process instance + */ + protected ProcessInstance processInstance; + + /** + * task instance + */ + protected TaskInstance taskInstance; + + /** + * task queue + */ + protected ITaskQueue taskQueue; + protected boolean cancel; + + /** + * load configuration file + */ + private static Configuration conf; + + static { + try { + conf = new PropertiesConfiguration(Constants.MASTER_PROPERTIES_PATH); + } catch (ConfigurationException e) { + logger.error(e.getMessage(), e); + System.exit(1); + } + } + + public MasterBaseTaskExecThread(TaskInstance taskInstance, ProcessInstance processInstance){ + this.processDao = BeanContext.getBean(ProcessDao.class); + this.alertDao = BeanContext.getBean(AlertDao.class); + this.processInstance = processInstance; + this.taskQueue = TaskQueueFactory.getTaskQueueInstance(); + this.cancel = false; + this.taskInstance = taskInstance; + } + + public TaskInstance getTaskInstance(){ + return this.taskInstance; + } + + public void kill(){ + this.cancel = true; + } + + protected TaskInstance submit(){ + Integer commitRetryTimes = conf.getInt(Constants.MASTER_COMMIT_RETRY_TIMES, + Constants.defaultMasterCommitRetryTimes); + Integer commitRetryInterval = conf.getInt(Constants.MASTER_COMMIT_RETRY_INTERVAL, + Constants.defaultMasterCommitRetryInterval); + + int retryTimes = 1; + + while (retryTimes <= commitRetryTimes){ + try { + TaskInstance task = processDao.submitTask(taskInstance, processInstance); + if(task != null){ + return task; + } + logger.error("task commit to mysql and queue failed , task has already retry {} times, please check the database", commitRetryTimes); + Thread.sleep(commitRetryInterval); + } catch (Exception e) { + logger.error("task commit to mysql and queue failed : " + e.getMessage(),e); + } + retryTimes += 1; + } + return null; + } + + protected Boolean submitWaitComplete(){ + return true; + } + + @Override + public Boolean call() throws Exception { + return submitWaitComplete(); + } + +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterExecThread.java b/escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterExecThread.java new file mode 100644 index 0000000000..101e72e300 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterExecThread.java @@ -0,0 +1,1036 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.master.runner; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.*; +import cn.escheduler.common.graph.DAG; +import cn.escheduler.common.model.TaskNode; +import cn.escheduler.common.model.TaskNodeRelation; +import cn.escheduler.common.process.ProcessDag; +import cn.escheduler.common.thread.Stopper; +import cn.escheduler.common.thread.ThreadUtils; +import cn.escheduler.common.utils.*; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.dao.utils.DagHelper; +import cn.escheduler.server.utils.AlertManager; +import com.alibaba.fastjson.JSONObject; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.configuration.ConfigurationException; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; + +import static cn.escheduler.common.Constants.*; + +/** + * master exec thread,split dag + */ +public class MasterExecThread implements Runnable { + + private static final Logger logger = LoggerFactory.getLogger(MasterExecThread.class); + + /** + * process instance + */ + private ProcessInstance processInstance; + + + /** + * runing TaskNode + */ + private final Map> activeTaskNode = new ConcurrentHashMap>(); + + private final ExecutorService taskExecService; + + /** + * submit failure nodes + */ + private Boolean taskFailedSubmit = false; + private List recoverNodeIdList = new ArrayList<>(); + private Map errorTaskList = new ConcurrentHashMap<>(); + private Map completeTaskList = new ConcurrentHashMap<>(); + private Map readyToSubmitTaskList = new ConcurrentHashMap<>(); + private Map dependFailedTask = new ConcurrentHashMap<>(); + private List recoverToleranceFaultTaskList = new ArrayList<>(); + + private AlertManager alertManager = new AlertManager(); + + private DAG dag; + + /** + * process dao + */ + private ProcessDao processDao; + + /** + * load configuration file + */ + private static Configuration conf; + + public MasterExecThread(ProcessInstance processInstance){ + this.processDao = DaoFactory.getDaoInstance(ProcessDao.class); + + this.processInstance = processInstance; + + int masterTaskExecNum = conf.getInt(Constants.MASTER_EXEC_TASK_THREADS, + Constants.defaultMasterTaskExecNum); + this.taskExecService = ThreadUtils.newDaemonFixedThreadExecutor("Master-Task-Exec-Thread", + masterTaskExecNum); + } + + + static { + try { + conf = new PropertiesConfiguration(Constants.MASTER_PROPERTIES_PATH); + }catch (ConfigurationException e){ + logger.error("load configuration failed : " + e.getMessage(),e); + System.exit(1); + } + } + + @Override + public void run() { + + // process instance is null + if (processInstance == null){ + logger.info("process instance is not exists"); + return; + } + + // check to see if it's done + if (processInstance.getState().typeIsFinished()){ + logger.info("process instance is done : {}",processInstance.getId()); + return; + } + + try { + if (processInstance.isComplementData() && Flag.NO == processInstance.getIsSubProcess()){ + // sub process complement data + executeComplementProcess(); + }else{ + // execute flow + executeProcess(); + } + }catch (Exception e){ + logger.error("master exec thread exception: " + e.getMessage(), e); + logger.error("process execute failed, process id:{}", processInstance.getId()); + processInstance.setState(ExecutionStatus.FAILURE); + processInstance.setEndTime(new Date()); + processDao.updateProcessInstance(processInstance); + }finally { + taskExecService.shutdown(); + // post handle + postHandle(); + } + } + + private void executeProcess() throws Exception { + prepareProcess(); + runProcess(); + endProcess(); + } + + /** + * execute complement process + * @throws Exception + */ + private void executeComplementProcess() throws Exception { + + Map cmdParam = JSONUtils.toMap(processInstance.getCommandParam()); + + Date startDate = DateUtils.getScheduleDate(cmdParam.get(CMDPARAM_COMPLEMENT_DATA_START_DATE)); + Date endDate = DateUtils.getScheduleDate(cmdParam.get(CMDPARAM_COMPLEMENT_DATA_END_DATE)); + processDao.saveProcessInstance(processInstance); + Date scheduleDate = processInstance.getScheduleTime(); + + if(scheduleDate == null){ + scheduleDate = startDate; + } + + while(Stopper.isRunning()){ + // prepare dag and other info + prepareProcess(); + + if(dag == null){ + logger.error("process {} dag is null, please check out parameters", + processInstance.getId()); + processInstance.setState(ExecutionStatus.SUCCESS); + processDao.updateProcessInstance(processInstance); + return; + } + + // execute process ,waiting for end + runProcess(); + + // process instace failure ,no more complements + if(!processInstance.getState().typeIsSuccess()){ + logger.info("process {} state {}, complement not completely!", + processInstance.getId(), processInstance.getState()); + break; + } + + // current process instance sucess ,next execute + scheduleDate = DateUtils.getSomeDay(scheduleDate, 1); + if(scheduleDate.after(endDate)){ + // all success + logger.info("process {} complement completely!", processInstance.getId()); + break; + } + + logger.info("process {} start to complement {} data", + processInstance.getId(), DateUtils.dateToString(scheduleDate)); + // execute next process instance complement data + processInstance.setScheduleTime(scheduleDate); + if(cmdParam.containsKey(Constants.CMDPARAM_RECOVERY_START_NODE_STRING)){ + cmdParam.remove(Constants.CMDPARAM_RECOVERY_START_NODE_STRING); + processInstance.setCommandParam(JSONUtils.toJson(cmdParam)); + } + + List taskInstanceList = processDao.findValidTaskListByProcessId(processInstance.getId()); + for(TaskInstance taskInstance : taskInstanceList){ + taskInstance.setFlag(Flag.NO); + processDao.updateTaskInstance(taskInstance); + } + processInstance.setState(ExecutionStatus.RUNNING_EXEUTION); + processInstance.setGlobalParams(ParameterUtils.curingGlobalParams( + processInstance.getProcessDefinition().getGlobalParamMap(), + processInstance.getProcessDefinition().getGlobalParamList(), + CommandType.COMPLEMENT_DATA,processInstance.getScheduleTime())); + + processDao.saveProcessInstance(processInstance); + } + + // flow end + endProcess(); + + } + + + /** + * prepare process parameter + * @throws Exception + */ + private void prepareProcess() throws Exception { + // init task queue + initTaskQueue(); + + // gen process dag + buildFlowDag(); + logger.info("prepare process :{} end", processInstance.getId()); + } + + + /** + * process end handle + */ + private void endProcess() { + processInstance.setEndTime(new Date()); + processDao.updateProcessInstance(processInstance); + if(processInstance.getState().typeIsWaittingThread()){ + processDao.createRecoveryWaitingThreadCommand(null, processInstance); + } + List taskInstances = processDao.findValidTaskListByProcessId(processInstance.getId()); + alertManager.sendWarnningOfProcessInstance(processInstance, taskInstances); + } + + + /** + * generate process dag + * @throws Exception + */ + private void buildFlowDag() throws Exception { + recoverNodeIdList = getStartTaskInstanceList(processInstance.getCommandParam()); + + // generate process to get DAG info + List recoveryNameList = getRecoveryNodeNameList(); + List startNodeNameList = parseStartNodeName(processInstance.getCommandParam()); + ProcessDag processDag = generateFlowDag(processInstance.getProcessInstanceJson(), + startNodeNameList, recoveryNameList, processInstance.getTaskDependType()); + if(processDag == null){ + logger.error("processDag is null"); + return; + } + // generate process dag + dag = buildDagGraph(processDag); + } + + private void initTaskQueue(){ + + taskFailedSubmit = false; + activeTaskNode.clear(); + dependFailedTask.clear(); + completeTaskList.clear(); + errorTaskList.clear(); + List taskInstanceList = processDao.findValidTaskListByProcessId(processInstance.getId()); + for(TaskInstance task : taskInstanceList){ + if(task.isTaskComplete()){ + completeTaskList.put(task.getName(), task); + } + if(task.getState().typeIsFailure() && !task.taskCanRetry()){ + errorTaskList.put(task.getName(), task); + } + } + } + + /** + * process post handle + */ + private void postHandle() { + logger.info("develop mode is: {}", CommonUtils.isDevelopMode()); + + if (!CommonUtils.isDevelopMode()) { + // get exec dir + String execLocalPath = cn.escheduler.common.utils.FileUtils + .getProcessExecDir(processInstance.getProcessDefinition().getProjectId(), + processInstance.getProcessDefinitionId(), + processInstance.getId()); + + try { + FileUtils.deleteDirectory(new File(execLocalPath)); + } catch (IOException e) { + logger.error("delete exec dir failed : " + e.getMessage(), e); + } + } + } + + + + /** + * submit task to execute + * @param taskInstance + */ + private TaskInstance submitTaskExec(TaskInstance taskInstance) { + MasterBaseTaskExecThread abstractExecThread = null; + if(taskInstance.isSubProcess()){ + abstractExecThread = new SubProcessTaskExecThread(taskInstance, processInstance); + }else { + abstractExecThread = new MasterTaskExecThread(taskInstance, processInstance); + } + Future future = taskExecService.submit(abstractExecThread); + activeTaskNode.putIfAbsent(abstractExecThread, future); + return abstractExecThread.getTaskInstance(); + } + + /** + * find task instance in db. + * in case submit more than one same name task in the same time. + * @param taskName + * @return + */ + private TaskInstance findTaskIfExists(String taskName){ + List taskInstanceList = processDao.findValidTaskListByProcessId(this.processInstance.getId()); + for(TaskInstance taskInstance : taskInstanceList){ + if(taskInstance.getName().equals(taskName)){ + return taskInstance; + } + } + return null; + } + + /** + * encapsulation task + * @param processInstance + * @param nodeName + * @return + */ + private TaskInstance createTaskInstance(ProcessInstance processInstance, String nodeName, + TaskNode taskNode, String parentNodeName) { + + TaskInstance taskInstance = findTaskIfExists(nodeName); + if(taskInstance == null){ + taskInstance = new TaskInstance(); + // task name + taskInstance.setName(nodeName); + // process instance define id + taskInstance.setProcessDefinitionId(processInstance.getProcessDefinitionId()); + // task instance state + taskInstance.setState(ExecutionStatus.SUBMITTED_SUCCESS); + // process instance id + taskInstance.setProcessInstanceId(processInstance.getId()); + // task instance node json + taskInstance.setTaskJson(JSONObject.toJSONString(taskNode)); + // task instance type + taskInstance.setTaskType(taskNode.getType()); + // task instance whether alert + taskInstance.setAlertFlag(Flag.NO); + + // task instance start time + taskInstance.setStartTime(new Date()); + + // task instance flag + taskInstance.setFlag(Flag.YES); + + // task instance retry times + taskInstance.setRetryTimes(0); + + // max task instance retry times + taskInstance.setMaxRetryTimes(taskNode.getMaxRetryTimes()); + + // retry task instance interval + taskInstance.setRetryInterval(taskNode.getRetryInterval()); + + // task instance priority + if(taskNode.getTaskInstancePriority() == null){ + taskInstance.setTaskInstancePriority(Priority.MEDIUM); + }else{ + taskInstance.setTaskInstancePriority(taskNode.getTaskInstancePriority()); + } + + } + return taskInstance; + } + + /** + * get post task instance by node + * + * @param dag + * @param parentNodeName + * @return + */ + private List getPostTaskInstanceByNode(DAG dag, String parentNodeName){ + + List postTaskList = new ArrayList<>(); + Collection startVertex = null; + if(StringUtils.isNotEmpty(parentNodeName)){ + startVertex = dag.getSubsequentNodes(parentNodeName); + }else{ + startVertex = dag.getBeginNode(); + } + for (String nodeName : startVertex){ + + // encapsulation task instance + TaskInstance taskInstance = createTaskInstance(processInstance, nodeName , + dag.getNode(nodeName),parentNodeName); + postTaskList.add(taskInstance); + } + return postTaskList; + } + + /** + * + * return start task node list + * + * @return + */ + private List getStartSubmitTaskList(){ + + List startTaskList = getPostTaskInstanceByNode(dag, null); + + HashMap successTaskMaps = new HashMap<>(); + List resultList = new ArrayList<>(); + while(Stopper.isRunning()){ + for(TaskInstance task : startTaskList){ + if(task.getState().typeIsSuccess()){ + successTaskMaps.put(task.getName(), task); + }else if(!completeTaskList.containsKey(task.getName()) && !errorTaskList.containsKey(task.getName())){ + resultList.add(task); + } + } + startTaskList.clear(); + if(successTaskMaps.size() == 0){ + break; + } + + Set taskNameKeys = successTaskMaps.keySet(); + for(String taskName : taskNameKeys){ + startTaskList.addAll(getPostTaskInstanceByNode(dag, taskName)); + } + successTaskMaps.clear(); + } + return resultList; + } + + /** + * submit post node + * @param parentNodeName + */ + private void submitPostNode(String parentNodeName){ + + List submitTaskList = null; + if(parentNodeName == null){ + submitTaskList = getStartSubmitTaskList(); + }else{ + submitTaskList = getPostTaskInstanceByNode(dag, parentNodeName); + } + // if previous node success , post node submit + for(TaskInstance task : submitTaskList){ + if(readyToSubmitTaskList.containsKey(task.getName())){ + continue; + } + + if(completeTaskList.containsKey(task.getName())){ + logger.info("task {} has already run success", task.getName()); + continue; + } + if(task.getState().typeIsPause() || task.getState().typeIsCancel()){ + logger.info("task {} stopped, the state is {}", task.getName(), task.getState().toString()); + }else{ + addTaskToStandByList(task); + } + } + } + + /** + * determine whether the dependencies of the task node are complete + * @return + */ + private DependResult isTaskDepsComplete(String taskName) { + + Collection startNodes = dag.getBeginNode(); + // ff the vertex returns true directly + if(startNodes.contains(taskName)){ + return DependResult.SUCCESS; + } + + TaskNode taskNode = dag.getNode(taskName); + List depsNameList = taskNode.getDepList(); + for(String depsNode : depsNameList ){ + + // dependencies must be all complete + if(!completeTaskList.containsKey(depsNode)){ + return DependResult.WAITING; + } + ExecutionStatus taskState = completeTaskList.get(depsNode).getState(); + if(taskState.typeIsFailure()){ + return DependResult.FAILED; + } + if(taskState.typeIsPause() || taskState.typeIsCancel()){ + return DependResult.WAITING; + } + } + + logger.info("taskName: {} completeDependTaskList: {}", taskName, Arrays.toString(completeTaskList.keySet().toArray())); + + return DependResult.SUCCESS; + } + + + /** + * query task instance by complete state + * @param state + * @return + */ + private List getCompleteTaskByState(ExecutionStatus state){ + List resultList = new ArrayList<>(); + Set taskList = completeTaskList.keySet(); + for(String taskName : taskList){ + TaskInstance taskInstance = completeTaskList.get(taskName); + if(taskInstance.getState() == state){ + resultList.add(taskInstance); + } + } + return resultList; + } + + /** + * where there are ongoing tasks + * @param state + * @return + */ + private ExecutionStatus runningState(ExecutionStatus state){ + if(state == ExecutionStatus.READY_STOP || + state == ExecutionStatus.READY_PAUSE || + state == ExecutionStatus.WAITTING_THREAD){ + // if the running task is not completed, the state remains unchanged + return state; + }else{ + return ExecutionStatus.RUNNING_EXEUTION; + } + } + + /** + * exists failure task , contains submit failure、dependency failure,execute failure(retry after) + * + * @return + */ + private Boolean hasFailedTask(){ + + if(this.taskFailedSubmit){ + return true; + } + if(this.errorTaskList.size() > 0){ + return true; + } + return this.dependFailedTask.size() > 0; + } + + /** + * process instance failure + * + * @return + */ + private Boolean processFailed(){ + if(hasFailedTask()) { + if(processInstance.getFailureStrategy() == FailureStrategy.END){ + return true; + } + if (processInstance.getFailureStrategy() == FailureStrategy.CONTINUE) { + return readyToSubmitTaskList.size() == 0 || activeTaskNode.size() == 0; + } + } + return false; + } + + /** + * whether task for waiting thread + * @return + */ + private Boolean hasWaitingThreadTask(){ + + List waitingList = getCompleteTaskByState(ExecutionStatus.WAITTING_THREAD); + return waitingList.size() > 0; + } + + /** + * prepare for pause + * 1,failed retry task in the preparation queue , returns to failure directly + * 2,exists pause task,complement not completed, pending submission of tasks, return to suspension + * 3,success + * @return + */ + private ExecutionStatus processReadyPause(){ + if(hasRetryTaskInStandBy()){ + return ExecutionStatus.FAILURE; + } + + List pauseList = getCompleteTaskByState(ExecutionStatus.PAUSE); + if(pauseList.size() > 0 + || !isComplementEnd() + || readyToSubmitTaskList.size() > 0){ + return ExecutionStatus.PAUSE; + }else{ + return ExecutionStatus.SUCCESS; + } + } + + + /** + * generate the latest process instance status by the tasks state + * @return + */ + private ExecutionStatus getProcessInstanceState(){ + ProcessInstance instance = processDao.findProcessInstanceById(processInstance.getId()); + ExecutionStatus state = instance.getState(); + + if(activeTaskNode.size() > 0){ + return runningState(state); + } + // process failure + if(processFailed()){ + return ExecutionStatus.FAILURE; + } + + // waiting thread + if(hasWaitingThreadTask()){ + return ExecutionStatus.WAITTING_THREAD; + } + + // pause + if(state == ExecutionStatus.READY_PAUSE){ + return processReadyPause(); + } + + // stop + if(state == ExecutionStatus.READY_STOP){ + List stopList = getCompleteTaskByState(ExecutionStatus.STOP); + List killList = getCompleteTaskByState(ExecutionStatus.KILL); + if(stopList.size() > 0 || killList.size() > 0 || !isComplementEnd()){ + return ExecutionStatus.STOP; + }else{ + return ExecutionStatus.SUCCESS; + } + } + + // success + if(state == ExecutionStatus.RUNNING_EXEUTION){ + if(readyToSubmitTaskList.size() > 0){ + //tasks currently pending submission, no retries, indicating that depend is waiting to complete + return ExecutionStatus.RUNNING_EXEUTION; + }else{ + // if the waiting queue is empty and the status is in progress, then success + return ExecutionStatus.SUCCESS; + } + } + + return state; + } + + /** + * whether complement end + * @return + */ + private Boolean isComplementEnd() { + if(!processInstance.isComplementData()){ + return true; + } + + try { + Map cmdParam = JSONUtils.toMap(processInstance.getCommandParam()); + Date endTime = DateUtils.getScheduleDate(cmdParam.get(CMDPARAM_COMPLEMENT_DATA_END_DATE)); + return processInstance.getScheduleTime().equals(endTime); + } catch (Exception e) { + logger.error("complement end failed : " + e.getMessage(),e); + return false; + } + } + + /** + * updateProcessInstance process instance state + * after each batch of tasks is executed, the status of the process instance is updated + */ + private void updateProcessInstanceState() { + ExecutionStatus state = getProcessInstanceState(); + if(processInstance.getState() != state){ + logger.info( + "work flow process instance [id: {}, name:{}], state change from {} to {}, cmd type: {}", + processInstance.getId(), processInstance.getName(), + processInstance.getState().toString(), state.toString(), + processInstance.getCommandType().toString()); + processInstance.setState(state); + ProcessInstance instance = processDao.findProcessInstanceById(processInstance.getId()); + instance.setState(state); + instance.setProcessDefinition(processInstance.getProcessDefinition()); + processDao.updateProcessInstance(instance); + processInstance = instance; + } + } + + /** + * get task dependency result + * @param taskInstance + * @return + */ + private DependResult getDependResultForTask(TaskInstance taskInstance){ + DependResult inner = isTaskDepsComplete(taskInstance.getName()); + return inner; + } + + /** + * add task to standy list + * @param taskInstance + */ + private void addTaskToStandByList(TaskInstance taskInstance){ + logger.info("add task to stand by list: {}", taskInstance.getName()); + readyToSubmitTaskList.putIfAbsent(taskInstance.getName(), taskInstance); + } + + /** + * remove task from stand by list + * @param taskInstance + */ + private void removeTaskFromStandbyList(TaskInstance taskInstance){ + logger.info("remove task from stand by list: {}", taskInstance.getName()); + readyToSubmitTaskList.remove(taskInstance.getName()); + } + + /** + * has retry task in standby + * @return + */ + private Boolean hasRetryTaskInStandBy(){ + Set taskNameSet = this.readyToSubmitTaskList.keySet(); + for(String taskName : taskNameSet){ + TaskInstance task = this.readyToSubmitTaskList.get(taskName); + if(task.getState().typeIsFailure()){ + return true; + } + } + return false; + } + + /** + * submit and watch the tasks, until the work flow stop + */ + private void runProcess(){ + // submit start node + submitPostNode(null); + // submitStandByTask(); + while(!processInstance.IsProcessInstanceStop()){ + Set keys = activeTaskNode.keySet(); + for (MasterBaseTaskExecThread taskExecThread : keys) { + Future future = activeTaskNode.get(taskExecThread); + TaskInstance task = taskExecThread.getTaskInstance(); + + if(!future.isDone()){ + continue; + } + // node monitor thread complete + activeTaskNode.remove(taskExecThread); + if(task == null){ + this.taskFailedSubmit = true; + continue; + } + logger.info("task :{}, id:{} complete, state is {} ", + task.getName(), task.getId(), task.getState().toString()); + // node success , post node submit + if(task.getState() == ExecutionStatus.SUCCESS){ + completeTaskList.put(task.getName(), task); + submitPostNode(task.getName()); + continue; + } + // node fails, retry first, and then execute the failure process + if(task.getState().typeIsFailure()){ + if(task.getState() == ExecutionStatus.NEED_FAULT_TOLERANCE){ + this.recoverToleranceFaultTaskList.add(task); + } + if(task.taskCanRetry()){ + addTaskToStandByList(task); + }else{ + // node failure, based on failure strategy + errorTaskList.put(task.getName(), task); + completeTaskList.put(task.getName(), task); + if(processInstance.getFailureStrategy() == FailureStrategy.END){ + kill(); + } + } + continue; + } + // other status stop/pause + completeTaskList.put(task.getName(), task); + } + // send alert + if(this.recoverToleranceFaultTaskList.size() > 0){ + alertManager.sendWarnningWorkerleranceFault(processInstance, recoverToleranceFaultTaskList); + this.recoverToleranceFaultTaskList.clear(); + } + // updateProcessInstance completed task status + // failure priority is higher than pause + // if a task fails, other suspended tasks need to be reset kill + if(errorTaskList.size() > 0){ + for(String taskName : completeTaskList.keySet()){ + TaskInstance completeTask = completeTaskList.get(taskName); + if(completeTask.getState()== ExecutionStatus.PAUSE){ + completeTask.setState(ExecutionStatus.KILL); + completeTaskList.put(taskName, completeTask); + processDao.updateTaskInstance(completeTask); + } + } + } + if(canSubmitTaskToQueue()){ + submitStandByTask(); + } + try { + Thread.sleep(Constants.SLEEP_TIME_MILLIS); + } catch (InterruptedException e) { + logger.error(e.getMessage(),e); + } + updateProcessInstanceState(); + } + + logger.info("process:{} end, state :{}", processInstance.getId(), processInstance.getState()); + } + + private boolean canSubmitTaskToQueue() { + return OSUtils.checkResource(conf, true); + } + + + /** + * close the ongoing tasks + */ + private void kill() { + + logger.info("kill called on process instance id: {}, num: {}", processInstance.getId(), + activeTaskNode.size()); + for (Map.Entry> entry : activeTaskNode.entrySet()) { + + MasterBaseTaskExecThread taskExecThread = entry.getKey(); + Future future = entry.getValue(); + + if (!future.isDone()) { + // record kill info + logger.info("kill process instance, id: {}, task: {}", processInstance.getId(), taskExecThread.getTaskInstance().getId()); + + // kill node + taskExecThread.kill(); + } + } + } + + /*** + * generate dag graph + * @param processDag + * @return + */ + public DAG buildDagGraph(ProcessDag processDag) { + + DAG dag = new DAG<>(); + + /** + * add vertex + */ + if (CollectionUtils.isNotEmpty(processDag.getNodes())){ + for (TaskNode node : processDag.getNodes()){ + dag.addNode(node.getName(),node); + } + } + + /** + * add edge + */ + if (CollectionUtils.isNotEmpty(processDag.getEdges())){ + for (TaskNodeRelation edge : processDag.getEdges()){ + dag.addEdge(edge.getStartNode(),edge.getEndNode()); + } + } + return dag; + } + + /** + * whether the retry interval is timed out + * @param taskInstance + * @return + */ + private Boolean retryTaskIntervalOverTime(TaskInstance taskInstance){ + if(taskInstance.getState() != ExecutionStatus.FAILURE){ + return Boolean.TRUE; + } + if(taskInstance.getId() == 0 || + taskInstance.getMaxRetryTimes() ==0 || + taskInstance.getRetryInterval() == 0 ){ + return Boolean.TRUE; + } + Date now = new Date(); + long failedTimeInterval = DateUtils.differSec(now, taskInstance.getEndTime()); + // task retry does not over time, return false + if(taskInstance.getRetryInterval() * SEC_2_MINUTES_TIME_UNIT >= failedTimeInterval){ + return Boolean.FALSE; + } + return Boolean.TRUE; + } + + /** + * handling the list of tasks to be submitted + */ + private void submitStandByTask(){ + Set readySubmitTaskNames = readyToSubmitTaskList.keySet(); + for(String readySubmitTaskName : readySubmitTaskNames){ + TaskInstance task = readyToSubmitTaskList.get(readySubmitTaskName); + DependResult dependResult = getDependResultForTask(task); + if(DependResult.SUCCESS == dependResult){ + if(retryTaskIntervalOverTime(task)){ + submitTaskExec(task); + removeTaskFromStandbyList(task); + } + }else if(DependResult.FAILED == dependResult){ + // if the dependency fails, the current node is not submitted and the state changes to failure. + dependFailedTask.put(readySubmitTaskName, task); + removeTaskFromStandbyList(task); + logger.info("task {},id:{} depend result : {}",task.getName(), task.getId(), dependResult); + } + } + } + + private TaskInstance getRecoveryTaskInstance(String taskId){ + if(!StringUtils.isNotEmpty(taskId)){ + return null; + } + try { + Integer intId = Integer.valueOf(taskId); + TaskInstance task = processDao.findTaskInstanceById(intId); + if(task == null){ + logger.error("start node id cannot be found: {}", taskId); + }else { + return task; + } + }catch (Exception e){ + logger.error("get recovery task instance failed : " + e.getMessage(),e); + } + return null; + } + + /** + * get start task instance list + * @param cmdParam + * @return + */ + private List getStartTaskInstanceList( String cmdParam){ + + List instanceList = new ArrayList<>(); + Map paramMap = JSONUtils.toMap(cmdParam); + + if(paramMap != null && paramMap.containsKey(CMDPARAM_RECOVERY_START_NODE_STRING)){ + String[] idList = paramMap.get(CMDPARAM_RECOVERY_START_NODE_STRING).split(Constants.COMMA); + for(String nodeId : idList){ + TaskInstance task = getRecoveryTaskInstance(nodeId); + if(task != null){ + instanceList.add(task); + } + } + } + return instanceList; + } + + /** + * parse "StartNodeNameList" from cmd param + * @param cmdParam + * @return + */ + private List parseStartNodeName(String cmdParam){ + List startNodeNameList = new ArrayList<>(); + Map paramMap = JSONUtils.toMap(cmdParam); + if(paramMap == null){ + return startNodeNameList; + } + if(paramMap.containsKey(CMDPARAM_START_NODE_NAMES)){ + startNodeNameList = Arrays.asList(paramMap.get(CMDPARAM_START_NODE_NAMES).split(Constants.COMMA)); + } + return startNodeNameList; + } + + /** + * generate start node name list from parsing command param; + * if "StartNodeIdList" exists in command param, return StartNodeIdList + * @return + */ + private List getRecoveryNodeNameList(){ + List recoveryNodeNameList = new ArrayList<>(); + if(recoverNodeIdList.size() > 0) { + for (TaskInstance task : recoverNodeIdList) { + recoveryNodeNameList.add(task.getName()); + } + } + return recoveryNodeNameList; + } + + /** + * generate flow dag + * @param processDefinitionJson + * @return + * @throws Exception + */ + public ProcessDag generateFlowDag(String processDefinitionJson, + List startNodeNameList, + List recoveryNodeNameList, + TaskDependType depNodeType)throws Exception{ + return DagHelper.generateFlowDag(processDefinitionJson, startNodeNameList, recoveryNodeNameList, depNodeType); + } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterSchedulerThread.java b/escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterSchedulerThread.java new file mode 100644 index 0000000000..b845e19ae0 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterSchedulerThread.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.master.runner; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.thread.Stopper; +import cn.escheduler.common.thread.ThreadUtils; +import cn.escheduler.common.utils.OSUtils; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.server.zk.ZKMasterClient; +import org.apache.commons.configuration.Configuration; +import org.apache.curator.framework.imps.CuratorFrameworkState; +import org.apache.curator.framework.recipes.locks.InterProcessMutex; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadPoolExecutor; + +/** + * master scheduler thread + */ +public class MasterSchedulerThread implements Runnable { + + private static final Logger logger = LoggerFactory.getLogger(MasterSchedulerThread.class); + + private final ExecutorService masterExecService; + + /** + * escheduler database interface + */ + private final ProcessDao processDao; + + private final ZKMasterClient zkMasterClient ; + + private int masterExecThreadNum; + + private final Configuration conf; + + + public MasterSchedulerThread(ZKMasterClient zkClient, ProcessDao processDao, Configuration conf, int masterExecThreadNum){ + this.processDao = processDao; + this.zkMasterClient = zkClient; + this.conf = conf; + this.masterExecThreadNum = masterExecThreadNum; + this.masterExecService = ThreadUtils.newDaemonFixedThreadExecutor("Master-Exec-Thread",masterExecThreadNum); + } + + + @Override + public void run() { + while (Stopper.isRunning()){ + + // process instance + ProcessInstance processInstance = null; + + InterProcessMutex mutex = null; + try { + + if(OSUtils.checkResource(conf, true)){ + if (zkMasterClient.getZkClient().getState() == CuratorFrameworkState.STARTED) { + + // create distributed lock with the root node path of the lock space as /escheduler/lock/failover/master + String znodeLock = zkMasterClient.getMasterLockPath(); + + mutex = new InterProcessMutex(zkMasterClient.getZkClient(), znodeLock); + mutex.acquire(); + + ThreadPoolExecutor poolExecutor = (ThreadPoolExecutor) masterExecService; + int activeCount = poolExecutor.getActiveCount(); + // make sure to scan and delete command table in one transaction + processInstance = processDao.scanCommand(logger, OSUtils.getHost(), this.masterExecThreadNum - activeCount); + if (processInstance != null) { + logger.info("start master exex thread , split DAG ..."); + masterExecService.execute(new MasterExecThread(processInstance)); + } + } + } + + // accessing the command table every SLEEP_TIME_MILLIS milliseconds + Thread.sleep(Constants.SLEEP_TIME_MILLIS); + + }catch (Exception e){ + logger.error("master scheduler thread exception : " + e.getMessage(),e); + }finally{ + if (mutex != null){ + try { + mutex.release(); + } catch (Exception e) { + if(e.getMessage().equals("instance must be started before calling this method")){ + logger.warn("lock release"); + }else{ + logger.error("lock release failed : " + e.getMessage(),e); + } + + } + } + } + } + } + + +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterTaskExecThread.java b/escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterTaskExecThread.java new file mode 100644 index 0000000000..36b771ad34 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/master/runner/MasterTaskExecThread.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.master.runner; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.ExecutionStatus; +import cn.escheduler.common.enums.TaskTimeoutStrategy; +import cn.escheduler.common.model.TaskNode; +import cn.escheduler.common.task.TaskTimeoutParameter; +import cn.escheduler.common.thread.Stopper; +import cn.escheduler.dao.model.ProcessDefinition; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.dao.model.TaskInstance; +import com.alibaba.fastjson.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Date; + +import static cn.escheduler.common.Constants.SCHEDULER_TASKS_KILL; + +/** + * master task exec thread + */ +public class MasterTaskExecThread extends MasterBaseTaskExecThread { + + private static final Logger logger = LoggerFactory.getLogger(MasterTaskExecThread.class); + + + public MasterTaskExecThread(TaskInstance taskInstance, ProcessInstance processInstance){ + super(taskInstance, processInstance); + } + + /** + * get task instance + * @return + */ + @Override + public TaskInstance getTaskInstance(){ + return this.taskInstance; + } + + private Boolean alreadyKilled = false; + + @Override + public Boolean submitWaitComplete() { + Boolean result = false; + this.taskInstance = submit(); + if(!this.taskInstance.getState().typeIsFinished()) { + result = waitTaskQuit(); + } + taskInstance.setEndTime(new Date()); + processDao.updateTaskInstance(taskInstance); + logger.info("task :{} id:{}, process id:{}, exec thread completed ", + this.taskInstance.getName(),taskInstance.getId(), processInstance.getId() ); + return result; + } + + + public Boolean waitTaskQuit(){ + // query new state + taskInstance = processDao.findTaskInstanceById(taskInstance.getId()); + Boolean result = true; + // task time out + Boolean checkTimeout = false; + TaskTimeoutParameter taskTimeoutParameter = getTaskTimeoutParameter(); + if(taskTimeoutParameter.getEnable()){ + TaskTimeoutStrategy strategy = taskTimeoutParameter.getStrategy(); + if(strategy == TaskTimeoutStrategy.WARN || strategy == TaskTimeoutStrategy.WARNFAILED){ + checkTimeout = true; + } + } + + while (Stopper.isRunning()){ + try { + if(this.processInstance == null){ + logger.error("process instance not exists , master task exec thread exit"); + return result; + } + // task instance add queue , waiting worker to kill + if(this.cancel || this.processInstance.getState() == ExecutionStatus.READY_STOP){ + cancelTaskInstance(); + } + // task instance finished + if (taskInstance.getState().typeIsFinished()){ + break; + } + if(checkTimeout){ + long remainTime = getRemaintime(taskTimeoutParameter.getInterval()*60); + if (remainTime < 0) { + logger.warn("task id: {} execution time out",taskInstance.getId()); + // process define + ProcessDefinition processDefine = processDao.findProcessDefineById(processInstance.getProcessDefinitionId()); + // send warn mail + alertDao.sendTaskTimeoutAlert(processInstance.getWarningGroupId(),processDefine.getReceivers(),processDefine.getReceiversCc(),taskInstance.getId(),taskInstance.getName()); + checkTimeout = false; + } + } + // updateProcessInstance task instance + taskInstance = processDao.findTaskInstanceById(taskInstance.getId()); + processInstance = processDao.findProcessInstanceById(processInstance.getId()); + Thread.sleep(Constants.SLEEP_TIME_MILLIS); + } catch (Exception e) { + logger.error("exception: "+ e.getMessage(),e); + logger.error("wait task quit failed, instance id:{}, task id:{}", + processInstance.getId(), taskInstance.getId()); + } + } + return result; + } + + + /** + * task instance add queue , waiting worker to kill + */ + private void cancelTaskInstance(){ + if(alreadyKilled || taskInstance.getHost() == null){ + return ; + } + alreadyKilled = true; + String queueValue = String.format("%s-%d", + taskInstance.getHost(), taskInstance.getId()); + taskQueue.sadd(SCHEDULER_TASKS_KILL, queueValue); + + logger.info("master add kill task :{} id:{} to kill queue", + taskInstance.getName(), taskInstance.getId() ); + } + + /** + * get task timeout parameter + * @return + */ + private TaskTimeoutParameter getTaskTimeoutParameter(){ + String taskJson = taskInstance.getTaskJson(); + TaskNode taskNode = JSONObject.parseObject(taskJson, TaskNode.class); + return taskNode.getTaskTimeoutParameter(); + } + + + /** + * get remain time(s) + * + * @return + */ + private long getRemaintime(long timeoutSeconds) { + Date startTime = taskInstance.getStartTime(); + long usedTime = (System.currentTimeMillis() - startTime.getTime()) / 1000; + long remainTime = timeoutSeconds - usedTime; + return remainTime; + } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/master/runner/SubProcessTaskExecThread.java b/escheduler-server/src/main/java/cn/escheduler/server/master/runner/SubProcessTaskExecThread.java new file mode 100644 index 0000000000..4eb9ac711d --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/master/runner/SubProcessTaskExecThread.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.master.runner; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.ExecutionStatus; +import cn.escheduler.common.thread.Stopper; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.dao.model.TaskInstance; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Date; + +/** + * subflow task exec thread + */ +public class SubProcessTaskExecThread extends MasterBaseTaskExecThread { + + + private static final Logger logger = LoggerFactory.getLogger(SubProcessTaskExecThread.class); + + + private ProcessInstance subProcessInstance; + + public SubProcessTaskExecThread(TaskInstance taskInstance, ProcessInstance processInstance){ + super(taskInstance, processInstance); + } + + @Override + public Boolean submitWaitComplete() { + + Boolean result = false; + try{ + // submit task instance + this.taskInstance = submit(); + + if(taskInstance == null){ + logger.error("sub work flow submit task instance to mysql and queue failed , please check and fix it"); + return result; + } + setTaskInstanceState(); + waitTaskQuit(); + subProcessInstance = processDao.findSubProcessInstance(processInstance.getId(), taskInstance.getId()); + + // at the end of the subflow , the task state is changed to the subflow state + if(subProcessInstance != null){ + if(subProcessInstance.getState() == ExecutionStatus.STOP){ + this.taskInstance.setState(ExecutionStatus.KILL); + }else{ + this.taskInstance.setState(subProcessInstance.getState()); + result = true; + } + } + taskInstance.setEndTime(new Date()); + processDao.updateTaskInstance(taskInstance); + logger.info("subflow task :{} id:{}, process id:{}, exec thread completed ", + this.taskInstance.getName(),taskInstance.getId(), processInstance.getId() ); + result = true; + + }catch (Exception e){ + logger.error("exception: "+ e.getMessage(),e); + logger.error("wait task quit failed, instance id:{}, task id:{}", + processInstance.getId(), taskInstance.getId()); + } + return result; + } + + + /** + * set task instance state + * @return + */ + private Boolean setTaskInstanceState(){ + subProcessInstance = processDao.findSubProcessInstance(processInstance.getId(), taskInstance.getId()); + if(subProcessInstance == null || taskInstance.getState().typeIsFinished()){ + return false; + } + + taskInstance.setState(ExecutionStatus.RUNNING_EXEUTION); + taskInstance.setStartTime(new Date()); + processDao.updateTaskInstance(taskInstance); + return true; + } + + /** + * updateProcessInstance parent state + */ + private void updateParentProcessState(){ + ProcessInstance parentProcessInstance = processDao.findProcessInstanceById(this.processInstance.getId()); + + if(parentProcessInstance == null){ + logger.error("parent work flow instance is null , please check it! work flow id {}", processInstance.getId()); + return; + } + this.processInstance.setState(parentProcessInstance.getState()); + } + + /** + * wait task quit + * @throws InterruptedException + */ + private void waitTaskQuit() throws InterruptedException { + + logger.info("wait sub work flow: {} complete", this.taskInstance.getName()); + + if (taskInstance.getState().typeIsFinished()) { + logger.info("sub work flow task {} already complete. task state:{}, parent work flow instance state:{}", + this.taskInstance.getName(), + this.taskInstance.getState().toString(), + this.processInstance.getState().toString()); + return; + } + while (Stopper.isRunning()) { + // waiting for subflow process instance establishment + if (subProcessInstance == null) { + + Thread.sleep(Constants.SLEEP_TIME_MILLIS); + + if(!setTaskInstanceState()){ + continue; + } + } + subProcessInstance = processDao.findProcessInstanceById(subProcessInstance.getId()); + updateParentProcessState(); + if (subProcessInstance.getState().typeIsFinished()){ + break; + } + + if(this.processInstance.getState() == ExecutionStatus.READY_PAUSE){ + // parent process "ready to pause" , child process "pause" + pauseSubProcess(); + }else if(this.cancel || this.processInstance.getState() == ExecutionStatus.READY_STOP){ + // parent Process "Ready to Cancel" , subflow "Cancel" + stopSubProcess(); + } + Thread.sleep(Constants.SLEEP_TIME_MILLIS); + } + } + + /** + * stop subflow + */ + private void stopSubProcess() { + if(subProcessInstance.getState() == ExecutionStatus.STOP || + subProcessInstance.getState() == ExecutionStatus.READY_STOP){ + return; + } + subProcessInstance.setState(ExecutionStatus.READY_STOP); + processDao.updateProcessInstance(subProcessInstance); + } + + /** + * pause subflow + */ + private void pauseSubProcess() { + if(subProcessInstance.getState() == ExecutionStatus.PAUSE || + subProcessInstance.getState() == ExecutionStatus.READY_PAUSE){ + return; + } + subProcessInstance.setState(ExecutionStatus.READY_PAUSE); + processDao.updateProcessInstance(subProcessInstance); + } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/rpc/LogClient.java b/escheduler-server/src/main/java/cn/escheduler/server/rpc/LogClient.java new file mode 100644 index 0000000000..745bd323bf --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/rpc/LogClient.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.rpc; + +import cn.escheduler.rpc.*; +import io.grpc.ManagedChannel; +import io.grpc.ManagedChannelBuilder; +import io.grpc.StatusRuntimeException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.TimeUnit; + +/** + * log client + */ +public class LogClient { + + private static final Logger logger = LoggerFactory.getLogger(LogClient.class); + + private final ManagedChannel channel; + private final LogViewServiceGrpc.LogViewServiceBlockingStub blockingStub; + + /** Construct client connecting to HelloWorld server at {@code host:port}. */ + public LogClient(String host, int port) { + this(ManagedChannelBuilder.forAddress(host, port) + // Channels are secure by default (via SSL/TLS). For the example we disable TLS to avoid + // needing certificates. + .usePlaintext(true)); + } + + /** Construct client for accessing RouteGuide server using the existing channel. */ + LogClient(ManagedChannelBuilder channelBuilder) { + /** + * set max message read size + */ + channelBuilder.maxInboundMessageSize(Integer.MAX_VALUE); + channel = channelBuilder.build(); + blockingStub = LogViewServiceGrpc.newBlockingStub(channel); + } + + public void shutdown() throws InterruptedException { + channel.shutdown().awaitTermination(5, TimeUnit.SECONDS); + } + + /** + * roll view log + * @param path + * @param skipLineNum + * @param limit + * @return + */ + public String rollViewLog(String path,int skipLineNum,int limit) { + logger.info("roll view log , path : {},skipLineNum : {} ,limit :{}", path, skipLineNum, limit); + LogParameter pathParameter = LogParameter + .newBuilder() + .setPath(path) + .setSkipLineNum(skipLineNum) + .setLimit(limit) + .build(); + RetStrInfo retStrInfo; + try { + retStrInfo = blockingStub.rollViewLog(pathParameter); + return retStrInfo.getMsg(); + } catch (StatusRuntimeException e) { + logger.error("roll view log failed : " + e.getMessage(), e); + return null; + } + } + + /** + * view all log + * @param path + * @return + */ + public String viewLog(String path) { + logger.info("view log path : {}",path); + + PathParameter pathParameter = PathParameter.newBuilder().setPath(path).build(); + RetStrInfo retStrInfo; + try { + retStrInfo = blockingStub.viewLog(pathParameter); + return retStrInfo.getMsg(); + } catch (StatusRuntimeException e) { + logger.error("view log failed : " + e.getMessage(), e); + return null; + } + } + + /** + * get log bytes + * @param path + * @return + */ + public byte[] getLogBytes(String path) { + logger.info("get log bytes {}",path); + + PathParameter pathParameter = PathParameter.newBuilder().setPath(path).build(); + RetByteInfo retByteInfo; + try { + retByteInfo = blockingStub.getLogBytes(pathParameter); + return retByteInfo.getData().toByteArray(); + } catch (StatusRuntimeException e) { + logger.error("get log bytes failed : " + e.getMessage(), e); + return null; + } + } +} \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/rpc/LoggerServer.java b/escheduler-server/src/main/java/cn/escheduler/server/rpc/LoggerServer.java new file mode 100644 index 0000000000..8913cdda06 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/rpc/LoggerServer.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.rpc; + +import cn.escheduler.common.Constants; +import cn.escheduler.rpc.*; +import com.google.protobuf.ByteString; +import io.grpc.Server; +import io.grpc.ServerBuilder; +import io.grpc.stub.StreamObserver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * logger server + */ +public class LoggerServer { + + private static final Logger logger = LoggerFactory.getLogger(LoggerServer.class); + + /** + * server + */ + private Server server; + + private void start() throws IOException { + /* The port on which the server should run */ + int port = Constants.RPC_PORT; + server = ServerBuilder.forPort(port) + .addService(new LogViewServiceGrpcImpl()) + .build() + .start(); + logger.info("server started, listening on port : {}" , port); + Runtime.getRuntime().addShutdownHook(new Thread() { + @Override + public void run() { + // Use stderr here since the logger may have been reset by its JVM shutdown hook. + logger.info("shutting down gRPC server since JVM is shutting down"); + LoggerServer.this.stop(); + logger.info("server shut down"); + } + }); + } + + private void stop() { + if (server != null) { + server.shutdown(); + } + } + + /** + * await termination on the main thread since the grpc library uses daemon threads. + */ + private void blockUntilShutdown() throws InterruptedException { + if (server != null) { + server.awaitTermination(); + } + } + + /** + * main launches the server from the command line. + */ + public static void main(String[] args) throws IOException, InterruptedException { + final LoggerServer server = new LoggerServer(); + server.start(); + server.blockUntilShutdown(); + } + + + static class LogViewServiceGrpcImpl extends LogViewServiceGrpc.LogViewServiceImplBase { + @Override + public void rollViewLog(LogParameter request, StreamObserver responseObserver) { + + logger.info("log parameter path : {} ,skipLine : {}, limit : {}", + request.getPath(), + request.getSkipLineNum(), + request.getLimit()); + List list = readFile(request.getPath(), request.getSkipLineNum(), request.getLimit()); + StringBuilder sb = new StringBuilder(); + for (String line : list){ + sb.append(line + "\r\n"); + } + RetStrInfo retInfoBuild = RetStrInfo.newBuilder().setMsg(sb.toString()).build(); + responseObserver.onNext(retInfoBuild); + responseObserver.onCompleted(); + } + + @Override + public void viewLog(PathParameter request, StreamObserver responseObserver) { + logger.info("task path is : {} " , request.getPath()); + RetStrInfo retInfoBuild = RetStrInfo.newBuilder().setMsg(readFile(request.getPath())).build(); + responseObserver.onNext(retInfoBuild); + responseObserver.onCompleted(); + } + + @Override + public void getLogBytes(PathParameter request, StreamObserver responseObserver) { + try { + ByteString bytes = ByteString.copyFrom(getFileBytes(request.getPath())); + RetByteInfo.Builder builder = RetByteInfo.newBuilder(); + builder.setData(bytes); + responseObserver.onNext(builder.build()); + responseObserver.onCompleted(); + }catch (Exception e){ + logger.error("get log bytes failed : " + e.getMessage(),e); + } + } + } + + /** + * get files bytes + * @param path + * @return + * @throws Exception + */ + private static byte[] getFileBytes(String path)throws IOException{ + InputStream in = null; + ByteArrayOutputStream bos = null; + try { + in = new FileInputStream(path); + bos = new ByteArrayOutputStream(); + byte[] buffer = new byte[4096]; + int n = 0; + while ((n = in.read(buffer)) != -1) { + bos.write(buffer, 0, n); + } + return bos.toByteArray(); + }catch (IOException e){ + logger.error("getFileBytes error",e); + }finally { + bos.close(); + in.close(); + } + return null; + } + + /** + * read file content + * @param path + * @param skipLine + * @param limit + * @return + */ + private static List readFile(String path,int skipLine,int limit){ + try (Stream stream = Files.lines(Paths.get(path))) { + return stream.skip(skipLine).limit(limit).collect(Collectors.toList()); + } catch (IOException e) { + logger.error("read file failed : " + e.getMessage(),e); + } + return null; + } + + /** + * read file content + * @param path + * @return + * @throws Exception + */ + private static String readFile(String path){ + BufferedReader br = null; + String line = null; + StringBuilder sb = new StringBuilder(); + try { + br = new BufferedReader(new InputStreamReader(new FileInputStream(path))); + while ((line = br.readLine()) != null){ + sb.append(line + "\r\n"); + } + return sb.toString(); + }catch (IOException e){ + logger.error("read file failed : " + e.getMessage(),e); + }finally { + try { + if (br != null){ + br.close(); + } + } catch (IOException e) { + logger.error(e.getMessage(),e); + } + } + return null; + } + +} \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/utils/AlertManager.java b/escheduler-server/src/main/java/cn/escheduler/server/utils/AlertManager.java new file mode 100644 index 0000000000..ca478096bd --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/utils/AlertManager.java @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.utils; + + +import cn.escheduler.common.enums.AlertType; +import cn.escheduler.common.enums.CommandType; +import cn.escheduler.common.enums.ShowType; +import cn.escheduler.common.enums.WarningType; +import cn.escheduler.common.utils.DateUtils; +import cn.escheduler.common.utils.JSONUtils; +import cn.escheduler.dao.AlertDao; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.model.Alert; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.dao.model.TaskInstance; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Date; +import java.util.LinkedHashMap; +import java.util.List; + +/** + * alert manager + */ +public class AlertManager { + + private static final Logger logger = LoggerFactory.getLogger(AlertManager.class); + + private AlertDao alertDao = DaoFactory.getDaoInstance(AlertDao.class); + + + /** + * command type convert chinese + * @param commandType + * @return + */ + private String getCommandCnName(CommandType commandType) { + switch (commandType) { + case RECOVER_TOLERANCE_FAULT_PROCESS: + return "恢复容错"; + case RECOVER_SUSPENDED_PROCESS: + return "恢复暂停流程"; + case START_CURRENT_TASK_PROCESS: + return "从当前节点开始执行"; + case START_FAILURE_TASK_PROCESS: + return "从失败节点开始执行"; + case START_PROCESS: + return "启动工作流"; + case REPEAT_RUNNING: + return "重跑"; + case SCHEDULER: + return "定时执行"; + case COMPLEMENT_DATA: + return "补数"; + case PAUSE: + return "暂停工作流"; + case STOP: + return "停止工作流"; + default: + return "未知的命令类型"; + } + } + + /** + * process instance format + */ + private static final String PROCESS_INSTANCE_FORMAT = + "\"Id:%d\"," + + "\"Name:%s\"," + + "\"Job type: %s\"," + + "\"State: %s\"," + + "\"Recovery:%s\"," + + "\"Run time: %d\"," + + "\"Start time: %s\"," + + "\"End time: %s\"," + + "\"Host: %s\"" ; + + /** + * get process instance content + * @param processInstance + * @return + */ + public String getContentProcessInstance(ProcessInstance processInstance, + List taskInstances){ + + String res = ""; + if(processInstance.getState().typeIsSuccess()){ + res = String.format(PROCESS_INSTANCE_FORMAT, + processInstance.getId(), + processInstance.getName(), + getCommandCnName(processInstance.getCommandType()), + processInstance.getState().toString(), + processInstance.getRecovery().toString(), + processInstance.getRunTimes(), + DateUtils.dateToString(processInstance.getStartTime()), + DateUtils.dateToString(processInstance.getEndTime()), + processInstance.getHost() + + ); + res = "[" + res + "]"; + }else if(processInstance.getState().typeIsFailure()){ + + List failedTaskList = new ArrayList<>(); + + for(TaskInstance task : taskInstances){ + if(task.getState().typeIsSuccess()){ + continue; + } + LinkedHashMap failedTaskMap = new LinkedHashMap(); + failedTaskMap.put("任务id", String.valueOf(task.getId())); + failedTaskMap.put("任务名称", task.getName()); + failedTaskMap.put("任务类型", task.getTaskType()); + failedTaskMap.put("任务状态", task.getState().toString()); + failedTaskMap.put("任务开始时间", DateUtils.dateToString(task.getStartTime())); + failedTaskMap.put("任务结束时间", DateUtils.dateToString(task.getEndTime())); + failedTaskMap.put("host", task.getHost()); + failedTaskMap.put("日志路径", task.getLogPath()); + failedTaskList.add(failedTaskMap); + } + res = JSONUtils.toJson(failedTaskList); + } + + return res; + } + + /** + * getting worker fault tolerant content + * @param processInstance + * @param toleranceTaskList + * @return + */ + private String getWorkerToleranceContent(ProcessInstance processInstance, List toleranceTaskList){ + + List> toleranceTaskInstanceList = new ArrayList<>(); + + for(TaskInstance taskInstance: toleranceTaskList){ + LinkedHashMap toleranceWorkerContentMap = new LinkedHashMap(); + toleranceWorkerContentMap.put("工作流程名称", processInstance.getName()); + toleranceWorkerContentMap.put("容错任务名称", taskInstance.getName()); + toleranceWorkerContentMap.put("容错机器IP", taskInstance.getHost()); + toleranceWorkerContentMap.put("任务失败次数", String.valueOf(taskInstance.getRetryTimes())); + toleranceTaskInstanceList.add(toleranceWorkerContentMap); + } + return JSONUtils.toJson(toleranceTaskInstanceList); + } + + /** + * send worker alert fault tolerance + * @param processInstance + * @param toleranceTaskList + */ + public void sendWarnningWorkerleranceFault(ProcessInstance processInstance, List toleranceTaskList){ + Alert alert = new Alert(); + alert.setTitle("worker容错报警"); + alert.setShowType(ShowType.TABLE); + String content = getWorkerToleranceContent(processInstance, toleranceTaskList); + alert.setContent(content); + alert.setAlertType(AlertType.EMAIL); + alert.setCreateTime(new Date()); + alert.setAlertGroupId(processInstance.getWarningGroupId()); + alert.setReceivers(processInstance.getProcessDefinition().getReceivers()); + alert.setReceiversCc(processInstance.getProcessDefinition().getReceiversCc()); + + alertDao.addAlert(alert); + logger.info("add alert to db , alert : {}", alert.toString()); + + } + + /** + * send process instance alert + * @param processInstance + */ + public void sendWarnningOfProcessInstance(ProcessInstance processInstance, + List taskInstances){ + + boolean sendWarnning = false; + WarningType warningType = processInstance.getWarningType(); + switch (warningType){ + case ALL: + if(processInstance.getState().typeIsFinished()){ + sendWarnning = true; + } + break; + case SUCCESS: + if(processInstance.getState().typeIsSuccess()){ + sendWarnning = true; + } + break; + case FAILURE: + if(processInstance.getState().typeIsFailure()){ + sendWarnning = true; + } + break; + default: + } + if(!sendWarnning){ + return; + } + Alert alert = new Alert(); + + + String cmdName = getCommandCnName(processInstance.getCommandType()); + String success = processInstance.getState().typeIsSuccess() ? "成功" :"失败"; + alert.setTitle(cmdName + success); + ShowType showType = processInstance.getState().typeIsSuccess() ? ShowType.TEXT : ShowType.TABLE; + alert.setShowType(showType); + String content = getContentProcessInstance(processInstance, taskInstances); + alert.setContent(content); + alert.setAlertType(AlertType.EMAIL); + alert.setAlertGroupId(processInstance.getWarningGroupId()); + alert.setCreateTime(new Date()); + alert.setReceivers(processInstance.getProcessDefinition().getReceivers()); + alert.setReceiversCc(processInstance.getProcessDefinition().getReceiversCc()); + + alertDao.addAlert(alert); + logger.info("add alert to db , alert: {}", alert.toString()); + } + +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/utils/LoggerUtils.java b/escheduler-server/src/main/java/cn/escheduler/server/utils/LoggerUtils.java new file mode 100644 index 0000000000..bbb404f536 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/utils/LoggerUtils.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.utils; + +import org.slf4j.Logger; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * logger utils + */ +public class LoggerUtils { + + /** + * rules for extracting application ID + */ + private static final Pattern APPLICATION_REGEX = Pattern.compile("\\d+_\\d+"); + + /** + * build job id + * @param affix + * @param processDefId + * @param processInstId + * @param taskId + * @return + */ + public static String buildTaskId(String affix, + int processDefId, + int processInstId, + int taskId){ + return String.format("%s_%s_%s_%s",affix, + processDefId, + processInstId, + taskId); + } + + + /** + * processing log + * get yarn application id list + * @param log + * @param logger + * @return + */ + public static List getAppIds(String log, Logger logger) { + + List appIds = new ArrayList(); + + Matcher matcher = APPLICATION_REGEX.matcher(log); + + // analyse logs to get all submit yarn application id + while (matcher.find()) { + String appId = matcher.group(); + if(!appIds.contains(appId)){ + logger.info("find app id: {}", appId); + appIds.add(appId); + } + } + return appIds; + } +} \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/utils/ParamUtils.java b/escheduler-server/src/main/java/cn/escheduler/server/utils/ParamUtils.java new file mode 100644 index 0000000000..e3041ffd9d --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/utils/ParamUtils.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.utils; + +import cn.escheduler.common.enums.CommandType; +import cn.escheduler.common.process.Property; +import cn.escheduler.common.utils.ParameterUtils; +import cn.escheduler.common.utils.placeholder.BusinessTimeUtils; + +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +/** + * param utils + */ +public class ParamUtils { + + /** + * parameter conversion + * + * @param globalParams + * @param localParams + * @return + */ + public static Map convert(Map globalParams, + Map globalParamsMap, + Map localParams, + CommandType commandType, + Date scheduleTime){ + if (globalParams == null + && globalParams == null + && localParams == null){ + return null; + } + // if it is a complement, + // you need to pass in the task instance id to locate the time + // of the process instance complement + Map timeParams = BusinessTimeUtils + .getBusinessTime(commandType, + scheduleTime); + + if (globalParamsMap != null){ + timeParams.putAll(globalParamsMap); + } + + if (globalParams != null && localParams != null){ + globalParams.putAll(localParams); + }else if (globalParams == null && localParams != null){ + globalParams = localParams; + } + Iterator> iter = globalParams.entrySet().iterator(); + while (iter.hasNext()){ + Map.Entry en = iter.next(); + Property property = en.getValue(); + + if (property.getValue() != null && property.getValue().length() > 0){ + if (property.getValue().startsWith("$")){ + /** + * local parameter refers to global parameter with the same name + * note: the global parameters of the process instance here are solidified parameters, + * and there are no variables in them. + */ + String val = property.getValue(); + val = ParameterUtils.convertParameterPlaceholders(val, timeParams); + property.setValue(val); + } + } + } + + return globalParams; + } + + /** + * format convert + * @param paramsMap + * @return + */ + public static Map convert(Map paramsMap){ + Map map = new HashMap<>(); + Iterator> iter = paramsMap.entrySet().iterator(); + while (iter.hasNext()){ + Map.Entry en = iter.next(); + map.put(en.getKey(),en.getValue().getValue()); + } + return map; + } +} \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/utils/ProcessUtils.java b/escheduler-server/src/main/java/cn/escheduler/server/utils/ProcessUtils.java new file mode 100644 index 0000000000..baf82de0df --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/utils/ProcessUtils.java @@ -0,0 +1,301 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.utils; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.utils.CommonUtils; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.server.rpc.LogClient; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * mainly used to get the start command line of a process + */ +public class ProcessUtils { + /** + * logger + */ + private final static Logger logger = LoggerFactory.getLogger(ProcessUtils.class); + + /** + * build command line characters + * @return + */ + public static String buildCommandStr(List commandList) throws IOException { + String cmdstr; + String[] cmd = commandList.toArray(new String[commandList.size()]); + SecurityManager security = System.getSecurityManager(); + boolean allowAmbiguousCommands = false; + if (security == null) { + allowAmbiguousCommands = true; + String value = System.getProperty("jdk.lang.Process.allowAmbiguousCommands"); + if (value != null) { + allowAmbiguousCommands = !"false".equalsIgnoreCase(value); + } + } + if (allowAmbiguousCommands) { + + String executablePath = new File(cmd[0]).getPath(); + + if (needsEscaping(VERIFICATION_LEGACY, executablePath)) { + executablePath = quoteString(executablePath); + } + + cmdstr = createCommandLine( + VERIFICATION_LEGACY, executablePath, cmd); + } else { + String executablePath; + try { + executablePath = getExecutablePath(cmd[0]); + } catch (IllegalArgumentException e) { + + StringBuilder join = new StringBuilder(); + for (String s : cmd) { + join.append(s).append(' '); + } + + cmd = getTokensFromCommand(join.toString()); + executablePath = getExecutablePath(cmd[0]); + + // Check new executable name once more + if (security != null) { + security.checkExec(executablePath); + } + } + + + cmdstr = createCommandLine( + + isShellFile(executablePath) ? VERIFICATION_CMD_BAT : VERIFICATION_WIN32, quoteString(executablePath), cmd); + } + return cmdstr; + } + + private static String getExecutablePath(String path) throws IOException { + boolean pathIsQuoted = isQuoted(true, path, "Executable name has embedded quote, split the arguments"); + + File fileToRun = new File(pathIsQuoted ? path.substring(1, path.length() - 1) : path); + return fileToRun.getPath(); + } + + private static boolean isShellFile(String executablePath) { + String upPath = executablePath.toUpperCase(); + return (upPath.endsWith(".CMD") || upPath.endsWith(".BAT")); + } + + private static String quoteString(String arg) { + StringBuilder argbuf = new StringBuilder(arg.length() + 2); + return argbuf.append('"').append(arg).append('"').toString(); + } + + + private static String[] getTokensFromCommand(String command) { + ArrayList matchList = new ArrayList<>(8); + Matcher regexMatcher = LazyPattern.PATTERN.matcher(command); + while (regexMatcher.find()) { + matchList.add(regexMatcher.group()); + } + return matchList.toArray(new String[matchList.size()]); + } + + private static class LazyPattern { + // Escape-support version: + // "(\")((?:\\\\\\1|.)+?)\\1|([^\\s\"]+)"; + private static final Pattern PATTERN = Pattern.compile("[^\\s\"]+|\"[^\"]*\""); + } + + private static final int VERIFICATION_CMD_BAT = 0; + + private static final int VERIFICATION_WIN32 = 1; + + private static final int VERIFICATION_LEGACY = 2; + + private static final char[][] ESCAPE_VERIFICATION = {{' ', '\t', '<', '>', '&', '|', '^'}, + + {' ', '\t', '<', '>'}, {' ', '\t'}}; + + private static String createCommandLine(int verificationType, final String executablePath, final String[] cmd) { + StringBuilder cmdbuf = new StringBuilder(80); + + cmdbuf.append(executablePath); + + for (int i = 1; i < cmd.length; ++i) { + cmdbuf.append(' '); + String s = cmd[i]; + if (needsEscaping(verificationType, s)) { + cmdbuf.append('"').append(s); + + if ((verificationType != VERIFICATION_CMD_BAT) && s.endsWith("\\")) { + cmdbuf.append('\\'); + } + cmdbuf.append('"'); + } else { + cmdbuf.append(s); + } + } + return cmdbuf.toString(); + } + + private static boolean isQuoted(boolean noQuotesInside, String arg, String errorMessage) { + int lastPos = arg.length() - 1; + if (lastPos >= 1 && arg.charAt(0) == '"' && arg.charAt(lastPos) == '"') { + // The argument has already been quoted. + if (noQuotesInside) { + if (arg.indexOf('"', 1) != lastPos) { + // There is ["] inside. + throw new IllegalArgumentException(errorMessage); + } + } + return true; + } + if (noQuotesInside) { + if (arg.indexOf('"') >= 0) { + // There is ["] inside. + throw new IllegalArgumentException(errorMessage); + } + } + return false; + } + + private static boolean needsEscaping(int verificationType, String arg) { + + boolean argIsQuoted = isQuoted((verificationType == VERIFICATION_CMD_BAT), arg, "Argument has embedded quote, use the explicit CMD.EXE call."); + + if (!argIsQuoted) { + char[] testEscape = ESCAPE_VERIFICATION[verificationType]; + for (int i = 0; i < testEscape.length; ++i) { + if (arg.indexOf(testEscape[i]) >= 0) { + return true; + } + } + } + return false; + } + + + /** + * kill yarn application + * @param appIds + * @param logger + * @param tenantCode + * @throws IOException + */ + public static void cancelApplication(List appIds, Logger logger, String tenantCode,String workDir) + throws IOException { + if (appIds.size() > 0) { + String appid = appIds.get(appIds.size() - 1); + String commandFile = String + .format("%s/%s.kill", workDir, appid); + String cmd = "yarn application -kill " + appid; + try { + StringBuilder sb = new StringBuilder(); + sb.append("#!/bin/sh\n"); + sb.append("BASEDIR=$(cd `dirname $0`; pwd)\n"); + sb.append("cd $BASEDIR\n"); + if (CommonUtils.getSystemEnvPath() != null) { + sb.append("source " + CommonUtils.getSystemEnvPath() + "\n"); + } + sb.append("\n\n"); + sb.append(cmd); + + File f = new File(commandFile); + + if (!f.exists()) { + FileUtils.writeStringToFile(new File(commandFile), sb.toString(), Charset.forName("UTF-8")); + } + + String runCmd = "sh " + commandFile; + if (StringUtils.isNotEmpty(tenantCode)) { + runCmd = "sudo -u " + tenantCode + " " + runCmd; + } + + logger.info("kill cmd:{}", runCmd); + + Runtime.getRuntime().exec(runCmd); + } catch (Exception e) { + logger.error("kill application failed : " + e.getMessage(), e); + } + } + } + + /** + * kill tasks according to different task types + * @param taskInstance + */ + public static void kill(TaskInstance taskInstance) { + try { + int processId = taskInstance.getPid(); + if(processId == 0 ){ + logger.error("process kill failed, process id :{}, task id:{}", + processId, taskInstance.getId()); + return ; + } + + String cmd = String.format("sudo kill -9 %d", processId); + + logger.info("process id:{}, cmd:{}", processId, cmd); + + Runtime.getRuntime().exec(cmd); + + // find log and kill yarn job + killYarnJob(taskInstance); + + } catch (Exception e) { + logger.error("kill failed : " + e.getMessage(), e); + } + } + + /** + * find logs and kill yarn tasks + * @param taskInstance + * @throws IOException + */ + public static void killYarnJob(TaskInstance taskInstance) throws Exception { + try { + Thread.sleep(Constants.SLEEP_TIME_MILLIS); + LogClient logClient = new LogClient(taskInstance.getHost(), Constants.RPC_PORT); + + String log = logClient.viewLog(taskInstance.getLogPath()); + if (StringUtils.isNotEmpty(log)) { + List appIds = LoggerUtils.getAppIds(log, logger); + String workerDir = taskInstance.getExecutePath(); + if (StringUtils.isEmpty(workerDir)) { + logger.error("task instance work dir is empty"); + throw new RuntimeException("task instance work dir is empty"); + } + if (appIds.size() > 0) { + cancelApplication(appIds, logger, taskInstance.getProcessInstance().getTenantCode(), taskInstance.getExecutePath()); + } + } + + } catch (Exception e) { + logger.error("kill yarn job failed : " + e.getMessage(),e); + throw new RuntimeException("kill yarn job fail"); + } + } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/utils/SparkArgsUtils.java b/escheduler-server/src/main/java/cn/escheduler/server/utils/SparkArgsUtils.java new file mode 100644 index 0000000000..b12c6ecef4 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/utils/SparkArgsUtils.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.utils; + + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.ProgramType; +import cn.escheduler.common.task.spark.SparkParameters; +import org.apache.commons.lang.StringUtils; + +import java.util.ArrayList; +import java.util.List; + + +/** + * spark args utils + */ +public class SparkArgsUtils { + + /** + * build args + * @param param + * @return + */ + public static List buildArgs(SparkParameters param) { + List args = new ArrayList<>(); + String deployMode = "cluster"; + + args.add(Constants.MASTER); + if(StringUtils.isNotEmpty(param.getDeployMode())){ + deployMode = param.getDeployMode(); + + } + if(!"local".equals(deployMode)){ + args.add("yarn"); + args.add(Constants.DEPLOY_MODE); + } + + args.add(param.getDeployMode()); + + if(param.getProgramType() !=null ){ + if(param.getProgramType()!=ProgramType.PYTHON){ + if (StringUtils.isNotEmpty(param.getMainClass())) { + args.add(Constants.CLASS); + args.add(param.getMainClass()); + } + } + } + + + if (param.getDriverCores() != 0) { + args.add(Constants.DRIVER_CORES); + args.add(String.format("%d", param.getDriverCores())); + } + + if (StringUtils.isNotEmpty(param.getDriverMemory())) { + args.add(Constants.DRIVER_MEMORY); + args.add(param.getDriverMemory()); + } + + if (param.getNumExecutors() != 0) { + args.add(Constants.NUM_EXECUTORS); + args.add(String.format("%d", param.getNumExecutors())); + } + + if (param.getExecutorCores() != 0) { + args.add(Constants.EXECUTOR_CORES); + args.add(String.format("%d", param.getExecutorCores())); + } + + if (StringUtils.isNotEmpty(param.getExecutorMemory())) { + args.add(Constants.EXECUTOR_MEMORY); + args.add(param.getExecutorMemory()); + } + + // --files --conf --libjar ... + if (StringUtils.isNotEmpty(param.getOthers())) { + String others = param.getOthers(); + if(!others.contains("--queue")){ + if (StringUtils.isNotEmpty(param.getQueue())) { + args.add(Constants.SPARK_QUEUE); + args.add(param.getQueue()); + } + } + args.add(param.getOthers()); + }else if (StringUtils.isNotEmpty(param.getQueue())) { + args.add(Constants.SPARK_QUEUE); + args.add(param.getQueue()); + + } + + if (param.getMainJar() != null) { + args.add(param.getMainJar().getRes()); + } + + if (StringUtils.isNotEmpty(param.getMainArgs())) { + args.add(param.getMainArgs()); + } + + return args; + } + +} \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/utils/UDFUtils.java b/escheduler-server/src/main/java/cn/escheduler/server/utils/UDFUtils.java new file mode 100644 index 0000000000..710d50e1a6 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/utils/UDFUtils.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.utils; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.utils.HadoopUtils; +import cn.escheduler.dao.model.UdfFunc; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; + +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static cn.escheduler.common.utils.CollectionUtils.isNotEmpty; + +/** + * udf utils + */ +public class UDFUtils { + + /** + * create function format + */ + private static final String CREATE_FUNCTION_FORMAT = "create temporary function {0} as ''{1}''"; + + + /** + * create function list + */ + public static List createFuncs(List udfFuncs, String tenantCode,Logger logger){ + // get hive udf jar path + String hiveUdfJarPath = HadoopUtils.getHdfsUdfDir(tenantCode); + logger.info("hive udf jar path : {}" , hiveUdfJarPath); + + // is the root directory of udf defined + if (StringUtils.isEmpty(hiveUdfJarPath)) { + logger.error("not define hive udf jar path"); + throw new RuntimeException("hive udf jar base path not defined "); + } + Set resources = getFuncResouces(udfFuncs); + List funcList = new ArrayList<>(); + + // build jar sql + buildJarSql(funcList, resources, hiveUdfJarPath); + + // build temp function sql + buildTempFuncSql(funcList, udfFuncs); + + return funcList; + } + + /** + * build jar sql + */ + private static void buildJarSql(List sqls, Set resources, String uploadPath) { + String defaultFS = HadoopUtils.getInstance().getConfiguration().get(Constants.FS_DEFAULTFS); + if (!uploadPath.startsWith("hdfs:")) { + uploadPath = defaultFS + uploadPath; + } + + for (String resource : resources) { + sqls.add(String.format("add jar %s/%s", uploadPath, resource)); + } + } + + /** + * build temp function sql + */ + private static void buildTempFuncSql(List sqls, List udfFuncs) { + if (isNotEmpty(udfFuncs)) { + for (UdfFunc udfFunc : udfFuncs) { + sqls.add(MessageFormat + .format(CREATE_FUNCTION_FORMAT, udfFunc.getFuncName(), udfFunc.getClassName())); + } + } + } + + /** + * get the resource names of all functions + */ + private static Set getFuncResouces(List udfFuncs) { + Set resources = new HashSet<>(); + + for (UdfFunc udfFunc : udfFuncs) { + resources.add(udfFunc.getResourceName()); + } + + return resources; + } + + +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/WorkerServer.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/WorkerServer.java new file mode 100644 index 0000000000..05e3d21c45 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/WorkerServer.java @@ -0,0 +1,366 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.IStoppable; +import cn.escheduler.common.enums.ExecutionStatus; +import cn.escheduler.common.enums.TaskType; +import cn.escheduler.common.queue.ITaskQueue; +import cn.escheduler.common.queue.TaskQueueFactory; +import cn.escheduler.common.thread.Stopper; +import cn.escheduler.common.thread.ThreadPoolExecutors; +import cn.escheduler.common.thread.ThreadUtils; +import cn.escheduler.common.utils.CollectionUtils; +import cn.escheduler.common.utils.OSUtils; +import cn.escheduler.dao.AlertDao; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.ServerDao; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.server.utils.ProcessUtils; +import cn.escheduler.server.worker.runner.FetchTaskThread; +import cn.escheduler.server.zk.ZKWorkerClient; +import org.apache.commons.configuration.Configuration; +import org.apache.commons.configuration.ConfigurationException; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.commons.lang.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +/** + * worker server + */ +public class WorkerServer implements IStoppable { + + private static final Logger logger = LoggerFactory.getLogger(WorkerServer.class); + + /** + * conf + */ + private static Configuration conf; + + /** + * object lock + */ + private final Object lock = new Object(); + + /** + * whether or not to close the state + */ + private boolean terminated = false; + + /** + * zk worker client + */ + private static ZKWorkerClient zkWorkerClient = null; + + /** + * worker dao database access + */ + private ServerDao serverDao = null; + + /** + * process database access + */ + private final ProcessDao processDao; + + /** + * alert database access + */ + private final AlertDao alertDao; + + /** + * heartbeat thread pool + */ + private ScheduledExecutorService heartbeatWorerService; + + /** + * heartbeat interval, unit second + */ + private int heartBeatInterval; + + /** + * task queue impl + */ + protected ITaskQueue taskQueue; + + /** + * kill executor service + */ + private ExecutorService killExecutorService; + + /** + * fetch task executor service + */ + private ExecutorService fetchTaskExecutorService; + + static { + try { + conf = new PropertiesConfiguration(Constants.WORKER_PROPERTIES_PATH); + }catch (ConfigurationException e){ + logger.error("load configuration failed : " + e.getMessage(),e); + System.exit(1); + } + } + + public WorkerServer(){ + zkWorkerClient = ZKWorkerClient.getZKWorkerClient(); + this.serverDao = zkWorkerClient.getServerDao(); + this.alertDao = DaoFactory.getDaoInstance(AlertDao.class); + this.processDao = DaoFactory.getDaoInstance(ProcessDao.class); + taskQueue = TaskQueueFactory.getTaskQueueInstance(); + + killExecutorService = ThreadUtils.newDaemonSingleThreadExecutor("Worker-Kill-Thread-Executor"); + + fetchTaskExecutorService = ThreadUtils.newDaemonSingleThreadExecutor("Worker-Fetch-Thread-Executor"); + + } + + public void run(){ + + // heartbeat interval + heartBeatInterval = conf.getInt(Constants.WORKER_HEARTBEAT_INTERVAL, + Constants.defaultWorkerHeartbeatInterval); + + heartbeatWorerService = ThreadUtils.newDaemonThreadScheduledExecutor("Worker-Heartbeat-Thread-Executor", Constants.defaulWorkerHeartbeatThreadNum); + + // heartbeat thread implement + Runnable heartBeatThread = heartBeatThread(); + + zkWorkerClient.setStoppable(this); + + // regular heartbeat + // delay 5 seconds, send heartbeat every 30 seconds + heartbeatWorerService. + scheduleAtFixedRate(heartBeatThread, 5, heartBeatInterval, TimeUnit.SECONDS); + + // kill process thread implement + Runnable killProcessThread = getKillProcessThread(); + + // submit kill process thread + killExecutorService.execute(killProcessThread); + + /** + * register hooks, which are called before the process exits + */ + Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { + @Override + public void run() { + String host = OSUtils.getHost(); + // clear worker table register info + serverDao.deleteWorker(host); + logger.info("worker server stopped"); + if (zkWorkerClient.getActiveMasterNum() <= 1) { + for (int i = 0; i < Constants.ESCHEDULER_WARN_TIMES_FAILOVER;i++) { + alertDao.sendServerStopedAlert(1, host, "Worker-Server"); + } + } + + } + })); + + // get worker number of concurrent tasks + int taskNum = conf.getInt(Constants.WORKER_FETCH_TASK_NUM,Constants.defaultWorkerFetchTaskNum); + + // new fetch task thread + FetchTaskThread fetchTaskThread = new FetchTaskThread(taskNum,zkWorkerClient, processDao,conf, taskQueue); + + // submit fetch task thread + fetchTaskExecutorService.execute(fetchTaskThread); + + + } + + public static void main(String[] args)throws Exception{ + + // set the name of the current thread + Thread.currentThread().setName("Worker-Main-Thread"); + + WorkerServer workerServer = new WorkerServer(); + + workerServer.run(); + + logger.info("worker server started"); + + // blocking + workerServer.awaitTermination(); + + + } + + + /** + * blocking implement + * @throws InterruptedException + */ + public void awaitTermination() throws InterruptedException { + synchronized (lock) { + while (!terminated) { + lock.wait(); + } + } + } + + /** + * heartbeat thread implement + * @return + */ + public Runnable heartBeatThread(){ + Runnable heartBeatThread = new Runnable() { + @Override + public void run() { + // send heartbeat to zk + if (StringUtils.isBlank(zkWorkerClient.getWorkerZNode())){ + logger.error("worker send heartbeat to zk failed"); + } + + zkWorkerClient.heartBeatForZk(zkWorkerClient.getWorkerZNode() , Constants.WORKER_PREFIX); + } + }; + return heartBeatThread; + } + + /** + * kill process thread implement + * @return + */ + public Runnable getKillProcessThread(){ + Runnable killProcessThread = new Runnable() { + @Override + public void run() { + Set taskInfoSet = taskQueue.smembers(Constants.SCHEDULER_TASKS_KILL); + while (Stopper.isRunning()){ + try { + Thread.sleep(Constants.SLEEP_TIME_MILLIS); + } catch (InterruptedException e) { + logger.error("interrupted exception : " + e.getMessage(),e); + } + // if set is null , return + if (CollectionUtils.isNotEmpty(taskInfoSet)){ + for (String taskInfo : taskInfoSet){ + // task info start with current host + if (taskInfo.startsWith(OSUtils.getHost())){ + String[] taskInfoArr = taskInfo.split("-"); + if (taskInfoArr.length != 2){ + continue; + }else { + int taskInstId=Integer.parseInt(taskInfoArr[1]); + + TaskInstance taskInstance = processDao.findTaskInstanceById(taskInstId); + + ProcessInstance instance = processDao.findProcessInstanceDetailById(taskInstance.getProcessInstanceId()); + + if(instance != null){ + taskInstance.setProcessInstance(instance); + } + if(taskInstance.getTaskType().equals(TaskType.DEPENDENT.toString())){ + taskInstance.setState(ExecutionStatus.KILL); + processDao.saveTaskInstance(taskInstance); + }else{ + ProcessUtils.kill(taskInstance); + } + taskQueue.srem(Constants.SCHEDULER_TASKS_KILL,taskInfo); + } + } + } + } + + taskInfoSet = taskQueue.smembers(Constants.SCHEDULER_TASKS_KILL); + } + } + }; + return killProcessThread; + } + + + + @Override + public synchronized void stop(String cause) { + + try { + //execute only once + if(Stopper.isStoped()){ + return; + } + + logger.info("worker server is stopping ..., cause : {}", cause); + + // set stop signal is true + Stopper.stop(); + + try { + //thread sleep 3 seconds for thread quitely stop + Thread.sleep(3000L); + }catch (Exception e){ + logger.warn("thread sleep exception:" + e.getMessage(), e); + } + + try { + heartbeatWorerService.shutdownNow(); + }catch (Exception e){ + logger.warn("heartbeat service stopped exception"); + } + logger.info("heartbeat service stopped"); + + try { + ThreadPoolExecutors.getInstance().shutdown(); + }catch (Exception e){ + logger.warn("threadpool service stopped exception:{}",e.getMessage()); + } + + logger.info("threadpool service stopped"); + + try { + killExecutorService.shutdownNow(); + }catch (Exception e){ + logger.warn("worker kill executor service stopped exception:{}",e.getMessage()); + } + logger.info("worker kill executor service stopped"); + + try { + fetchTaskExecutorService.shutdownNow(); + }catch (Exception e){ + logger.warn("worker fetch task service stopped exception:{}",e.getMessage()); + } + logger.info("worker fetch task service stopped"); + + try{ + zkWorkerClient.close(); + }catch (Exception e){ + logger.warn("zookeeper service stopped exception:{}",e.getMessage()); + } + logger.info("zookeeper service stopped"); + + //notify + synchronized (lock) { + terminated = true; + lock.notifyAll(); + } + } catch (Exception e) { + logger.error("worker server stop exception : " + e.getMessage(), e); + System.exit(-1); + } + } +} + diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/log/TaskLogAppender.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/log/TaskLogAppender.java new file mode 100644 index 0000000000..9e557127b5 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/log/TaskLogAppender.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.log; + +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.FileAppender; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * task log appender + */ +public class TaskLogAppender extends FileAppender { + + private static final Logger logger = LoggerFactory.getLogger(TaskLogAppender.class); + + private String currentlyActiveFile; + + @Override + protected void append(ILoggingEvent event) { + + if (currentlyActiveFile == null){ + currentlyActiveFile = getFile(); + } + String activeFile = currentlyActiveFile; + // thread name: taskThreadName-processDefineId_processInstanceId_taskInstanceId + String threadName = event.getThreadName(); + String[] threadNameArr = threadName.split("-"); + // logId = processDefineId_processInstanceId_taskInstanceId + String logId = threadNameArr[1]; + // split logId + threadNameArr = logId.split("_"); + String processDefineId = threadNameArr[0]; + String processInstanceId = threadNameArr[1]; + String taskInstanceId = threadNameArr[2]; + + activeFile = activeFile.replace("{processDefinitionId}",processDefineId); + activeFile = activeFile.replace("{processInstanceId}",processInstanceId); + activeFile = activeFile.replace("{taskInstanceId}",taskInstanceId); + + setFile(activeFile); + start(); + super.subAppend(event); + } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/log/TaskLogFilter.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/log/TaskLogFilter.java new file mode 100644 index 0000000000..7edc532f8e --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/log/TaskLogFilter.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.log; + +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.filter.Filter; +import ch.qos.logback.core.spi.FilterReply; + +/** + * task log filter + */ +public class TaskLogFilter extends Filter { + + @Override + public FilterReply decide(ILoggingEvent event) { + if (event.getThreadName().startsWith("TaskLogInfo-")){ + return FilterReply.ACCEPT; + } + return FilterReply.DENY; + } +} \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/log/TaskLogger.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/log/TaskLogger.java new file mode 100644 index 0000000000..a72f0620ce --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/log/TaskLogger.java @@ -0,0 +1,345 @@ + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.log; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.Marker; + +/** + * custom task logger + */ +public class TaskLogger implements Logger { + + private static Logger logger = LoggerFactory.getLogger(TaskLogger.class); + + private String taskAppId; + + public TaskLogger(String taskAppId) { + this.taskAppId = taskAppId; + } + + private String addJobId(String msg) { + return String.format("[taskAppId=%s] %s", taskAppId, msg); + } + + @Override + public String getName() { + return logger.getName(); + } + + @Override + public boolean isTraceEnabled() { + return logger.isTraceEnabled(); + } + + @Override + public void trace(String msg) { + logger.trace(addJobId(msg)); + } + + @Override + public void trace(String format, Object arg) { + logger.trace(addJobId(format), arg); + } + + @Override + public void trace(String format, Object arg1, Object arg2) { + logger.trace(addJobId(format), arg1, arg2); + } + + @Override + public void trace(String format, Object... arguments) { + logger.trace(addJobId(format), arguments); + } + + @Override + public void trace(String msg, Throwable t) { + logger.trace(addJobId(msg), t); + } + + @Override + public boolean isTraceEnabled(Marker marker) { + return logger.isTraceEnabled(marker); + } + + @Override + public void trace(Marker marker, String msg) { + logger.trace(marker, addJobId(msg)); + } + + @Override + public void trace(Marker marker, String format, Object arg) { + logger.trace(marker, addJobId(format), arg); + } + + @Override + public void trace(Marker marker, String format, Object arg1, Object arg2) { + logger.trace(marker, addJobId(format), arg1, arg2); + } + + @Override + public void trace(Marker marker, String format, Object... argArray) { + logger.trace(marker, addJobId(format), argArray); + } + + @Override + public void trace(Marker marker, String msg, Throwable t) { + logger.trace(marker, addJobId(msg), t); + } + + @Override + public boolean isDebugEnabled() { + return logger.isDebugEnabled(); + } + + @Override + public void debug(String msg) { + logger.debug(addJobId(msg)); + } + + @Override + public void debug(String format, Object arg) { + logger.debug(addJobId(format), arg); + } + + @Override + public void debug(String format, Object arg1, Object arg2) { + logger.debug(addJobId(format), arg1, arg2); + } + + @Override + public void debug(String format, Object... arguments) { + logger.debug(addJobId(format), arguments); + } + + @Override + public void debug(String msg, Throwable t) { + logger.debug(addJobId(msg), t); + } + + @Override + public boolean isDebugEnabled(Marker marker) { + return logger.isDebugEnabled(); + } + + @Override + public void debug(Marker marker, String msg) { + logger.debug(marker, addJobId(msg)); + } + + @Override + public void debug(Marker marker, String format, Object arg) { + logger.debug(marker, addJobId(format), arg); + } + + @Override + public void debug(Marker marker, String format, Object arg1, Object arg2) { + logger.debug(marker, addJobId(format), arg1, arg2); + } + + @Override + public void debug(Marker marker, String format, Object... arguments) { + logger.debug(marker, addJobId(format), arguments); + } + + @Override + public void debug(Marker marker, String msg, Throwable t) { + logger.debug(marker, addJobId(msg), t); + } + + @Override + public boolean isInfoEnabled() { + return logger.isInfoEnabled(); + } + + @Override + public void info(String msg) { + logger.info(addJobId(msg)); + } + + @Override + public void info(String format, Object arg) { + logger.info(addJobId(format), arg); + } + + @Override + public void info(String format, Object arg1, Object arg2) { + logger.info(addJobId(format), arg1, arg2); + } + + @Override + public void info(String format, Object... arguments) { + logger.info(addJobId(format), arguments); + } + + @Override + public void info(String msg, Throwable t) { + logger.info(addJobId(msg), t); + } + + @Override + public boolean isInfoEnabled(Marker marker) { + return logger.isInfoEnabled(); + } + + @Override + public void info(Marker marker, String msg) { + logger.info(marker, addJobId(msg)); + } + + @Override + public void info(Marker marker, String format, Object arg) { + logger.info(marker, addJobId(format), arg); + } + + @Override + public void info(Marker marker, String format, Object arg1, Object arg2) { + logger.info(marker, addJobId(format), arg1, arg2); + } + + @Override + public void info(Marker marker, String format, Object... arguments) { + logger.info(marker, addJobId(format), arguments); + } + + @Override + public void info(Marker marker, String msg, Throwable t) { + logger.info(marker, addJobId(msg), t); + } + + @Override + public boolean isWarnEnabled() { + return logger.isWarnEnabled(); + } + + @Override + public void warn(String msg) { + logger.warn(addJobId(msg)); + } + + @Override + public void warn(String format, Object arg) { + logger.warn(addJobId(format), arg); + } + + @Override + public void warn(String format, Object arg1, Object arg2) { + logger.warn(addJobId(format), arg1, arg2); + } + + @Override + public void warn(String format, Object... arguments) { + logger.warn(addJobId(format), arguments); + } + + @Override + public void warn(String msg, Throwable t) { + logger.warn(addJobId(msg), t); + } + + @Override + public boolean isWarnEnabled(Marker marker) { + return logger.isWarnEnabled(); + } + + @Override + public void warn(Marker marker, String msg) { + logger.warn(marker, addJobId(msg)); + } + + @Override + public void warn(Marker marker, String format, Object arg) { + logger.warn(marker, addJobId(format), arg); + } + + @Override + public void warn(Marker marker, String format, Object arg1, Object arg2) { + logger.warn(marker, addJobId(format), arg1, arg2); + } + + @Override + public void warn(Marker marker, String format, Object... arguments) { + logger.warn(marker, addJobId(format), arguments); + } + + @Override + public void warn(Marker marker, String msg, Throwable t) { + logger.warn(marker, addJobId(msg), t); + } + + @Override + public boolean isErrorEnabled() { + return logger.isErrorEnabled(); + } + + @Override + public void error(String msg) { + logger.error(addJobId(msg)); + } + + @Override + public void error(String format, Object arg) { + logger.error(addJobId(format), arg); + } + + @Override + public void error(String format, Object arg1, Object arg2) { + logger.error(addJobId(format), arg1, arg2); + } + + @Override + public void error(String format, Object... arguments) { + logger.error(addJobId(format), arguments); + } + + @Override + public void error(String msg, Throwable t) { + logger.error(addJobId(msg), t); + } + + @Override + public boolean isErrorEnabled(Marker marker) { + return logger.isErrorEnabled(); + } + + @Override + public void error(Marker marker, String msg) { + logger.error(marker, addJobId(msg)); + } + + @Override + public void error(Marker marker, String format, Object arg) { + logger.error(marker, addJobId(format), arg); + } + + @Override + public void error(Marker marker, String format, Object arg1, Object arg2) { + logger.error(marker, addJobId(format), arg1, arg2); + } + + @Override + public void error(Marker marker, String format, Object... arguments) { + logger.error(marker, addJobId(format), arguments); + } + + @Override + public void error(Marker marker, String msg, Throwable t) { + logger.error(marker, addJobId(msg), t); + } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/log/WorkerLogFilter.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/log/WorkerLogFilter.java new file mode 100644 index 0000000000..ca8ea3fc64 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/log/WorkerLogFilter.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.log; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.filter.Filter; +import ch.qos.logback.core.spi.FilterReply; + +/** + * worker log filter + */ +public class WorkerLogFilter extends Filter { + Level level; + + @Override + public FilterReply decide(ILoggingEvent event) { + if (event.getThreadName().startsWith("Worker-")){ + return FilterReply.ACCEPT; + } + return FilterReply.DENY; + } + public void setLevel(String level) { + this.level = Level.toLevel(level); + } +} \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/runner/FetchTaskThread.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/runner/FetchTaskThread.java new file mode 100644 index 0000000000..50402d4a3b --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/runner/FetchTaskThread.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.runner; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.queue.ITaskQueue; +import cn.escheduler.common.thread.Stopper; +import cn.escheduler.common.thread.ThreadUtils; +import cn.escheduler.common.utils.FileUtils; +import cn.escheduler.common.utils.OSUtils; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.ProcessDefinition; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.server.zk.ZKWorkerClient; +import com.cronutils.utils.StringUtils; +import org.apache.commons.configuration.Configuration; +import org.apache.curator.framework.recipes.locks.InterProcessMutex; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Date; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadPoolExecutor; + +/** + * fetch task thread + */ +public class FetchTaskThread implements Runnable{ + + private static final Logger logger = LoggerFactory.getLogger(FetchTaskThread.class); + /** + * set worker concurrent tasks + */ + private final int taskNum; + + /** + * zkWorkerClient + */ + private final ZKWorkerClient zkWorkerClient; + + /** + * task queue impl + */ + protected ITaskQueue taskQueue; + + /** + * process database access + */ + private final ProcessDao processDao; + + /** + * worker thread pool executor + */ + private final ExecutorService workerExecService; + + /** + * worker exec nums + */ + private int workerExecNums; + + private Configuration conf; + + + public FetchTaskThread(int taskNum, ZKWorkerClient zkWorkerClient, + ProcessDao processDao, Configuration conf, + ITaskQueue taskQueue){ + this.taskNum = taskNum; + this.zkWorkerClient = zkWorkerClient; + this.processDao = processDao; + this.workerExecNums = conf.getInt(Constants.WORKER_EXEC_THREADS, + Constants.defaultWorkerExecThreadNum); + // worker thread pool executor + this.workerExecService = ThreadUtils.newDaemonFixedThreadExecutor("Worker-Fetch-Task-Thread",workerExecNums); + this.conf = conf; + this.taskQueue = taskQueue; + } + + + @Override + public void run() { + + while (Stopper.isRunning()){ + + InterProcessMutex mutex = null; + try { + if(OSUtils.checkResource(this.conf, false)) { + + // creating distributed locks, lock path /escheduler/lock/worker + String zNodeLockPath = zkWorkerClient.getWorkerLockPath(); + mutex = new InterProcessMutex(zkWorkerClient.getZkClient(), zNodeLockPath); + mutex.acquire(); + + ThreadPoolExecutor poolExecutor = (ThreadPoolExecutor) workerExecService; + + for (int i = 0; i < taskNum; i++) { + + int activeCount = poolExecutor.getActiveCount(); + if (activeCount >= workerExecNums) { + logger.info("thread insufficient , activeCount : {} , workerExecNums : {}",activeCount,workerExecNums); + continue; + } + + // task instance id str + String taskInstIdStr = taskQueue.poll(Constants.SCHEDULER_TASKS_QUEUE); + + if (!StringUtils.isEmpty(taskInstIdStr)) { + Date now = new Date(); + + Integer taskId = Integer.parseInt(taskInstIdStr); + + // find task instance by task id + TaskInstance taskInstance = processDao.findTaskInstanceById(taskId); + + logger.info("worker fetch taskId : {} from queue ", taskId); + + int retryTimes = 30; + // mainly to wait for the master insert task to succeed + while (taskInstance == null && retryTimes > 0) { + Thread.sleep(Constants.SLEEP_TIME_MILLIS); + taskInstance = processDao.findTaskInstanceById(taskId); + retryTimes--; + } + + if (taskInstance == null) { + logger.error("task instance is null. task id : {} ", taskId); + continue; + } + + // set execute task worker host + taskInstance.setHost(OSUtils.getHost()); + taskInstance.setStartTime(now); + + + // get process instance + ProcessInstance processInstance = processDao.findProcessInstanceDetailById(taskInstance.getProcessInstanceId()); + + // get process define + ProcessDefinition processDefine = processDao.findProcessDefineById(taskInstance.getProcessDefinitionId()); + + + taskInstance.setProcessInstance(processInstance); + taskInstance.setProcessDefine(processDefine); + + + // get local execute path + String execLocalPath = FileUtils.getProcessExecDir(processDefine.getProjectId(), + processDefine.getId(), + processInstance.getId(), + taskInstance.getId()); + logger.info("task instance local execute path : {} ", execLocalPath); + + + // set task execute path + taskInstance.setExecutePath(execLocalPath); + + // check and create Linux users + FileUtils.createWorkDirAndUserIfAbsent(execLocalPath, + processDefine.getUserName(), logger); + + + // submit task + workerExecService.submit(new TaskScheduleThread(taskInstance, processDao)); + } + } + } + + Thread.sleep(Constants.SLEEP_TIME_MILLIS); + + }catch (Exception e){ + logger.error("fetch task thread exception : " + e.getMessage(),e); + } + finally { + if (mutex != null){ + try { + mutex.release(); + } catch (Exception e) { + if(e.getMessage().equals("instance must be started before calling this method")){ + logger.warn("fetch task lock release"); + }else{ + logger.error("fetch task lock release failed : " + e.getMessage(),e); + } + } + } + } + } + } +} \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/runner/TaskScheduleThread.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/runner/TaskScheduleThread.java new file mode 100644 index 0000000000..016607c79e --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/runner/TaskScheduleThread.java @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.runner; + + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.ExecutionStatus; +import cn.escheduler.common.enums.TaskType; +import cn.escheduler.common.model.TaskNode; +import cn.escheduler.common.process.Property; +import cn.escheduler.common.task.AbstractParameters; +import cn.escheduler.common.task.TaskTimeoutParameter; +import cn.escheduler.common.utils.CommonUtils; +import cn.escheduler.common.utils.HadoopUtils; +import cn.escheduler.common.utils.TaskParametersUtils; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.server.utils.LoggerUtils; +import cn.escheduler.server.worker.log.TaskLogger; +import cn.escheduler.server.worker.task.AbstractTask; +import cn.escheduler.server.worker.task.TaskManager; +import cn.escheduler.server.worker.task.TaskProps; +import com.alibaba.fastjson.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.*; +import java.util.concurrent.Callable; +import java.util.stream.Collectors; + + +/** + * task scheduler thread + */ +public class TaskScheduleThread implements Callable { + + /** + * logger + */ + private final Logger logger = LoggerFactory.getLogger(TaskScheduleThread.class); + + private static final String TASK_PREFIX = "TASK"; + + /** + * task instance + */ + private TaskInstance taskInstance; + + /** + * process database access + */ + private final ProcessDao processDao; + + /** + * execute task info + */ + private AbstractTask task; + + public TaskScheduleThread(TaskInstance taskInstance, ProcessDao processDao){ + this.processDao = processDao; + this.taskInstance = taskInstance; + } + + @Override + public Boolean call() throws Exception { + + // get task type + String taskType = taskInstance.getTaskType(); + // set task state + taskInstance.setState(ExecutionStatus.RUNNING_EXEUTION); + + // update task state + if(taskType.equals(TaskType.SQL.name()) || taskType.equals(TaskType.PROCEDURE.name())){ + processDao.changeTaskState(taskInstance.getState(), + taskInstance.getStartTime(), + taskInstance.getHost(), + null, + System.getProperty("user.dir") + "/logs/" + + taskInstance.getProcessDefinitionId() +"/" + + taskInstance.getProcessInstanceId() +"/" + + taskInstance.getId() + ".log", + taskInstance.getId()); + }else{ + processDao.changeTaskState(taskInstance.getState(), + taskInstance.getStartTime(), + taskInstance.getHost(), + taskInstance.getExecutePath(), + System.getProperty("user.dir") + "/logs/" + + taskInstance.getProcessDefinitionId() +"/" + + taskInstance.getProcessInstanceId() +"/" + + taskInstance.getId() + ".log", + taskInstance.getId()); + } + + ExecutionStatus status = ExecutionStatus.SUCCESS; + + try { + + + // custom param str + String customParamStr = taskInstance.getProcessInstance().getGlobalParams(); + + + Map allParamMap = new HashMap<>(); + + + if (customParamStr != null) { + List customParamMap = JSONObject.parseArray(customParamStr, Property.class); + + Map userDefinedParamMap = customParamMap.stream().collect(Collectors.toMap(Property::getProp, Property::getValue)); + + allParamMap.putAll(userDefinedParamMap); + } + + logger.info("script path : {}",taskInstance.getExecutePath()); + + TaskProps taskProps = new TaskProps(); + + taskProps.setTaskDir(taskInstance.getExecutePath()); + + String taskJson = taskInstance.getTaskJson(); + + + TaskNode taskNode = JSONObject.parseObject(taskJson, TaskNode.class); + + List projectRes = createProjectResFiles(taskNode); + + // copy hdfs file to local + copyHdfsToLocal(processDao, + taskInstance.getExecutePath(), + projectRes, + logger); + + // set task params + taskProps.setTaskParams(taskNode.getParams()); + // set tenant code , execute task linux user + taskProps.setTenantCode(taskInstance.getProcessInstance().getTenantCode()); + + ProcessInstance processInstance = processDao.findProcessInstanceByTaskId(taskInstance.getId()); + taskProps.setScheduleTime(processInstance.getScheduleTime()); + taskProps.setNodeName(taskInstance.getName()); + taskProps.setTaskInstId(taskInstance.getId()); + taskProps.setEnvFile(CommonUtils.getSystemEnvPath()); + // set queue + taskProps.setQueue(taskInstance.getProcessInstance().getQueue()); + taskProps.setTaskStartTime(taskInstance.getStartTime()); + taskProps.setDefinedParams(allParamMap); + + // set task timeout + setTaskTimeout(taskProps, taskNode); + + taskProps.setDependence(taskInstance.getDependency()); + + taskProps.setTaskAppId(String.format("%s_%s_%s", + taskInstance.getProcessDefine().getId(), + taskInstance.getProcessInstance().getId(), + taskInstance.getId())); + + // custom logger + TaskLogger taskLogger = new TaskLogger(LoggerUtils.buildTaskId(TASK_PREFIX, + taskInstance.getProcessDefine().getId(), + taskInstance.getProcessInstance().getId(), + taskInstance.getId())); + + task = TaskManager.newTask(taskInstance.getTaskType(), taskProps, taskLogger); + + // job init + task.init(); + + // job handle + task.handle(); + + + logger.info("task : {} exit status code : {}",taskProps.getTaskAppId(),task.getExitStatusCode()); + + if (task.getExitStatusCode() == Constants.EXIT_CODE_SUCCESS){ + status = ExecutionStatus.SUCCESS; + }else if (task.getExitStatusCode() == Constants.EXIT_CODE_KILL){ + status = ExecutionStatus.KILL; + }else { + status = ExecutionStatus.FAILURE; + } + }catch (Exception e){ + logger.error("task escheduler failure : " + e.getMessage(),e); + status = ExecutionStatus.FAILURE ; + logger.error(String.format("task process exception, process id : %s , task : %s", + taskInstance.getProcessInstanceId(), + taskInstance.getName()),e); + kill(); + } + // update task instance state + processDao.changeTaskState(status, + new Date(), + taskInstance.getId()); + return task.getExitStatusCode() > Constants.EXIT_CODE_SUCCESS; + } + + /** + * set task time out + * @param taskProps + * @param taskNode + */ + private void setTaskTimeout(TaskProps taskProps, TaskNode taskNode) { + taskProps.setTaskTimeout(Integer.MAX_VALUE); + TaskTimeoutParameter taskTimeoutParameter = taskNode.getTaskTimeoutParameter(); + if (taskTimeoutParameter.getEnable()){ + taskProps.setTaskTimeoutStrategy(taskTimeoutParameter.getStrategy()); + switch (taskTimeoutParameter.getStrategy()){ + case WARN: + break; + case FAILED: + if (Integer.MAX_VALUE > taskTimeoutParameter.getInterval() * 60) { + taskProps.setTaskTimeout(taskTimeoutParameter.getInterval() * 60); + } + break; + case WARNFAILED: + if (Integer.MAX_VALUE > taskTimeoutParameter.getInterval() * 60) { + taskProps.setTaskTimeout(taskTimeoutParameter.getInterval() * 60); + } + break; + default: + logger.error("not support task timeout strategy: {}", taskTimeoutParameter.getStrategy()); + throw new IllegalArgumentException("not support task timeout strategy"); + + } + } + } + + + /** + * kill task + */ + public void kill(){ + if (task != null){ + try { + task.cancelApplication(true); + }catch (Exception e){ + logger.error(e.getMessage(),e); + } + } + } + + + /** + * create project resource files + */ + private List createProjectResFiles(TaskNode taskNode) throws Exception{ + + Set projectFiles = new HashSet<>(); + AbstractParameters baseParam = TaskParametersUtils.getParameters(taskNode.getType(), taskNode.getParams()); + + if (baseParam != null) { + List projectResourceFiles = baseParam.getResourceFilesList(); + if (projectResourceFiles != null) { + projectFiles.addAll(projectResourceFiles); + } + } + + return new ArrayList<>(projectFiles); + } + + /** + * copy hdfs file to local + * + * @param processDao + * @param execLocalPath + * @param projectRes + * @param logger + */ + private void copyHdfsToLocal(ProcessDao processDao, String execLocalPath, List projectRes, Logger logger) throws IOException { + for (String res : projectRes) { + File resFile = new File(execLocalPath, res); + if (!resFile.exists()) { + try { + /** + * query the tenant code of the resource according to the name of the resource + */ + String tentnCode = processDao.queryTenantCodeByResName(res); + String resHdfsPath = HadoopUtils.getHdfsFilename(tentnCode,res); + + logger.info("get resource file from hdfs :{}", resHdfsPath); + HadoopUtils.getInstance().copyHdfsToLocal(resHdfsPath, execLocalPath + File.separator + res, false, true); + }catch (Exception e){ + logger.error(e.getMessage(),e); + throw new RuntimeException(e.getMessage()); + } + + } else { + logger.info("file : {} exists ", resFile.getName()); + } + } + } +} \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/AbstractCommandExecutor.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/AbstractCommandExecutor.java new file mode 100644 index 0000000000..9e617e65c0 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/AbstractCommandExecutor.java @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.ExecutionStatus; +import cn.escheduler.common.thread.ThreadUtils; +import cn.escheduler.common.utils.HadoopUtils; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.server.utils.ProcessUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; + +import java.io.*; +import java.lang.reflect.Field; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * abstract command executor + */ +public abstract class AbstractCommandExecutor { + /** + * rules for extracting application ID + */ + protected static final Pattern APPLICATION_REGEX = Pattern.compile(Constants.APPLICATION_REGEX); + + /** + * process + */ + private Process process; + + /** + * log handler + */ + protected Consumer> logHandler; + + /** + * task dir + */ + protected final String taskDir; + + /** + * task appId + */ + protected final String taskAppId; + + /** + * tenant code , execute task linux user + */ + protected final String tenantCode; + + /** + * env file + */ + protected final String envFile; + + /** + * start time + */ + protected final Date startTime; + + /** + * timeout + */ + protected int timeout; + + /** + * logger + */ + protected Logger logger; + + /** + * log list + */ + protected final List logBuffer; + + + public AbstractCommandExecutor(Consumer> logHandler, + String taskDir, String taskAppId, String tenantCode, String envFile, + Date startTime, int timeout, Logger logger){ + this.logHandler = logHandler; + this.taskDir = taskDir; + this.taskAppId = taskAppId; + this.tenantCode = tenantCode; + this.envFile = envFile; + this.startTime = startTime; + this.timeout = timeout; + this.logger = logger; + this.logBuffer = Collections.synchronizedList(new ArrayList<>()); + } + + /** + * task specific execution logic + * + * @param execCommand + * @param processDao + * @return + */ + public int run(String execCommand, ProcessDao processDao) { + int exitStatusCode; + + try { + if (StringUtils.isEmpty(execCommand)) { + exitStatusCode = 0; + return exitStatusCode; + } + + String commandFilePath = buildCommandFilePath(); + + // create command file if not exists + createCommandFileIfNotExists(execCommand, commandFilePath); + + //build process + buildProcess(commandFilePath); + + // parse process output + parseProcessOutput(process); + + // get process id + int pid = getProcessId(process); + + // task instance id + int taskInstId = Integer.parseInt(taskAppId.split("_")[2]); + + processDao.updatePidByTaskInstId(taskInstId, pid); + + logger.info("process start, process id is: {}", pid); + + // if timeout occurs, exit directly + long remainTime = getRemaintime(); + + // waiting for the run to finish + boolean status = process.waitFor(remainTime, TimeUnit.SECONDS); + + if (status) { + exitStatusCode = process.exitValue(); + logger.info("process has exited, work dir:{}, pid:{} ,exitStatusCode:{}", taskDir, pid,exitStatusCode); + //update process state to db + exitStatusCode = updateState(processDao, exitStatusCode, pid, taskInstId); + + } else { + cancelApplication(); + exitStatusCode = -1; + logger.warn("process timeout, work dir:{}, pid:{}", taskDir, pid); + } + + } catch (InterruptedException e) { + exitStatusCode = -1; + logger.error(String.format("interrupt exception: {}, task may be cancelled or killed",e.getMessage()), e); + throw new RuntimeException("interrupt exception. exitCode is : " + exitStatusCode); + } catch (Exception e) { + exitStatusCode = -1; + logger.error(e.getMessage(), e); + throw new RuntimeException("process error . exitCode is : " + exitStatusCode); + } + + return exitStatusCode; + } + + /** + * build process + * + * @param commandFile + * @throws IOException + */ + private void buildProcess(String commandFile) throws IOException { + //init process builder + ProcessBuilder processBuilder = new ProcessBuilder(); + // setting up a working directory + processBuilder.directory(new File(taskDir)); + // merge error information to standard output stream + processBuilder.redirectErrorStream(true); + // setting up user to run commands + processBuilder.command("sudo", "-u", tenantCode, commandType(), commandFile); + + process = processBuilder.start(); + + // print command + printCommand(processBuilder); + } + + /** + * update process state to db + * + * @param processDao + * @param exitStatusCode + * @param pid + * @param taskInstId + * @return + */ + private int updateState(ProcessDao processDao, int exitStatusCode, int pid, int taskInstId) { + //get yarn state by log + if (exitStatusCode != -1) { + TaskInstance taskInstance = processDao.findTaskInstanceById(taskInstId); + logger.info("process id is {}", pid); + + List appIds = getAppLinks(taskInstance.getLogPath()); + if (appIds.size() > 0) { + String appUrl = String.join(Constants.COMMA, appIds); + logger.info("yarn log url:{}",appUrl); + processDao.updatePidByTaskInstId(taskInstId, pid, appUrl); + } + + // check if all operations are completed + if (!isSuccessOfYarnState(appIds)) { + exitStatusCode = -1; + } + } + return exitStatusCode; + } + + + /** + * cancel python task + */ + public void cancelApplication() throws Exception { + if (process == null) { + return; + } + + // clear log + clear(); + + int processId = getProcessId(process); + + logger.info("cancel process: {}", processId); + + // kill , waiting for completion + boolean killed = softKill(processId); + + if (!killed) { + // hard kill + hardKill(processId); + + // destory + process.destroy(); + + process = null; + } + } + + /** + * soft kill + * @param processId + * @return + * @throws InterruptedException + */ + private boolean softKill(int processId) { + + if (processId != 0 && process.isAlive()) { + try { + // sudo -u user command to run command + String cmd = String.format("sudo kill %d", processId); + + logger.info("soft kill task:{}, process id:{}, cmd:{}", taskAppId, processId, cmd); + + Runtime.getRuntime().exec(cmd); + } catch (IOException e) { + logger.info("kill attempt failed." + e.getMessage(), e); + } + } + + return process.isAlive(); + } + + /** + * hard kill + * @param processId + */ + private void hardKill(int processId) { + if (processId != 0 && process.isAlive()) { + try { + String cmd = String.format("sudo kill -9 %d", processId); + + logger.info("hard kill task:{}, process id:{}, cmd:{}", taskAppId, processId, cmd); + + Runtime.getRuntime().exec(cmd); + } catch (IOException e) { + logger.error("kill attempt failed." + e.getMessage(), e); + } + } + } + + /** + * print command + * @param processBuilder + */ + private void printCommand(ProcessBuilder processBuilder) { + String cmdStr; + + try { + cmdStr = ProcessUtils.buildCommandStr(processBuilder.command()); + logger.info("task run command:\n{}", cmdStr); + } catch (IOException e) { + logger.error(e.getMessage(), e); + } + } + + /** + * clear + */ + private void clear() { + if (!logBuffer.isEmpty()) { + // log handle + logHandler.accept(logBuffer); + + logBuffer.clear(); + } + } + + /** + * get the standard output of the process + */ + private void parseProcessOutput(Process process) { + String threadLoggerInfoName = String.format("TaskLogInfo-%s", taskAppId); + ThreadUtils.newDaemonSingleThreadExecutor(threadLoggerInfoName).submit(new Runnable(){ + @Override + public void run() { + BufferedReader inReader = null; + + try { + inReader = new BufferedReader(new InputStreamReader(process.getInputStream())); + String line; + + long lastFlushTime = System.currentTimeMillis(); + + while ((line = inReader.readLine()) != null) { + if(checkShowLog(line)){ + logBuffer.add(line); + } + + lastFlushTime = flush(lastFlushTime); + } + } catch (Exception e) { + logger.error(e.getMessage(),e); + } finally { + clear(); + close(inReader); + } + } + }); + + } + + public int getPid() { + return getProcessId(process); + } + + /** + * check yarn state + * + * @param appIds + * @return + */ + public boolean isSuccessOfYarnState(List appIds) { + + boolean result = true; + try { + for (String appId : appIds) { + ExecutionStatus applicationStatus = HadoopUtils.getInstance().getApplicationStatus(appId); + logger.info("appId:{}, final state:{}",appId,applicationStatus.name()); + if (!applicationStatus.equals(ExecutionStatus.SUCCESS)) { + result = false; + } + } + } catch (Exception e) { + logger.error(String.format("mapreduce applications: %s status failed : " + e.getMessage(), appIds.toString()),e); + result = false; + } + return result; + + } + + /** + * get app links + * @param fileName + * @return + */ + private List getAppLinks(String fileName) { + List logs = convertFile2List(fileName); + + List appIds = new ArrayList(); + /** + * analysis log,get submited yarn application id + */ + for (String log : logs) { + + String appId = findAppId(log); + if (StringUtils.isNotEmpty(appId) && !appIds.contains(appId)) { + logger.info("find app id: {}", appId); + appIds.add(appId); + } + } + return appIds; + } + + /** + * convert file to list + * @param filename + * @return + */ + private List convertFile2List(String filename) { + List lineList = new ArrayList(100); + File file=new File(filename); + + if (!file.exists()){ + return lineList; + } + + BufferedReader br = null; + try { + br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), StandardCharsets.UTF_8)); + String line = null; + while ((line = br.readLine()) != null) { + lineList.add(line); + } + } catch (Exception e) { + logger.error(String.format("read file: %s failed : ",filename),e); + } finally { + if(br != null){ + try { + br.close(); + } catch (IOException e) { + logger.error(e.getMessage(),e); + } + } + + } + return lineList; + } + + /** + * find app id + * + * @return appid + */ + private String findAppId(String line) { + Matcher matcher = APPLICATION_REGEX.matcher(line); + + if (matcher.find() && checkFindApp(line)) { + return matcher.group(); + } + + return null; + } + + + /** + * get remain time(s) + * + * @return + */ + private long getRemaintime() { + long usedTime = (System.currentTimeMillis() - startTime.getTime()) / 1000; + long remainTime = timeout - usedTime; + + if (remainTime < 0) { + throw new RuntimeException("task execution time out"); + } + + return remainTime; + } + + /** + * get process id + * + * @param process + * @return + */ + private int getProcessId(Process process) { + int processId = 0; + + try { + Field f = process.getClass().getDeclaredField(Constants.PID); + f.setAccessible(true); + + processId = f.getInt(process); + } catch (Throwable e) { + logger.error(e.getMessage(), e); + } + + return processId; + } + + /** + * when log buffer siz or flush time reach condition , then flush + * + * @param lastFlushTime last flush time + * @return + */ + private long flush(long lastFlushTime) { + long now = System.currentTimeMillis(); + + /** + * when log buffer siz or flush time reach condition , then flush + */ + if (logBuffer.size() >= Constants.defaultLogRowsNum || now - lastFlushTime > Constants.defaultLogFlushInterval) { + lastFlushTime = now; + /** log handle */ + logHandler.accept(logBuffer); + + logBuffer.clear(); + } + return lastFlushTime; + } + + /** + * close buffer reader + * + * @param inReader + */ + private void close(BufferedReader inReader) { + if (inReader != null) { + try { + inReader.close(); + } catch (IOException e) { + logger.error(e.getMessage(), e); + } + } + } + + + protected abstract String buildCommandFilePath(); + protected abstract String commandType(); + protected abstract boolean checkShowLog(String line); + protected abstract boolean checkFindApp(String line); + protected abstract void createCommandFileIfNotExists(String execCommand, String commandFile) throws IOException; + + + +// if(line.contains(taskAppId) || !line.contains("cn.escheduler.server.worker.log.TaskLogger")){ +// logs.add(line); +// } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/AbstractTask.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/AbstractTask.java new file mode 100644 index 0000000000..645a314a4c --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/AbstractTask.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task; + +import cn.escheduler.common.task.AbstractParameters; +import org.slf4j.Logger; + +import java.util.List; + +/** + * executive task + */ +public abstract class AbstractTask { + + /** + * task props + **/ + protected TaskProps taskProps; + + /** + * log record + */ + protected Logger logger; + + + /** + * cancel + */ + protected volatile boolean cancel = false; + + /** + * exit code + */ + protected volatile int exitStatusCode = -1; + + /** + * @param taskProps + * @param logger + */ + protected AbstractTask(TaskProps taskProps, Logger logger) { + this.taskProps = taskProps; + this.logger = logger; + } + + /** + * init task + */ + public void init() throws Exception { + } + + /** + * task handle + */ + public abstract void handle() throws Exception; + + + + public void cancelApplication(boolean status) throws Exception { + cancel = true; + } + + /** + * log process + */ + public void logHandle(List logs) { + // note that the "new line" is added here to facilitate log parsing + logger.info(" -> {}", String.join("\n\t", logs)); + } + + + /** + * exit code + */ + public int getExitStatusCode() { + return exitStatusCode; + } + + + /** + * get task parameters + */ + public abstract AbstractParameters getParameters(); + + +} \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/AbstractYarnTask.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/AbstractYarnTask.java new file mode 100644 index 0000000000..7559864bab --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/AbstractYarnTask.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task; + +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.server.utils.ProcessUtils; +import org.slf4j.Logger; + +import java.io.IOException; + +/** + * abstract yarn task + */ +public abstract class AbstractYarnTask extends AbstractTask { + + /** + * process instance + */ + protected ProcessInstance processInstance; + + /** + * process task + */ + private ShellCommandExecutor processTask; + + /** + * process database access + */ + protected ProcessDao processDao; + + /** + * @param taskProps + * @param logger + * @throws IOException + */ + public AbstractYarnTask(TaskProps taskProps, Logger logger) { + super(taskProps, logger); + this.processDao = DaoFactory.getDaoInstance(ProcessDao.class); + // find process instance by taskId + this.processInstance = processDao.findProcessInstanceByTaskId(taskProps.getTaskInstId()); + this.processTask = new ShellCommandExecutor(this::logHandle, + taskProps.getTaskDir(), taskProps.getTaskAppId(), + taskProps.getTenantCode(), taskProps.getEnvFile(), taskProps.getTaskStartTime(), + taskProps.getTaskTimeout(), logger); + } + + @Override + public void handle() throws Exception { + try { + // construct process + exitStatusCode = processTask.run(buildCommand(), processDao); + } catch (Exception e) { + logger.error("yarn process failed : " + e.getMessage(), e); + exitStatusCode = -1; + } + } + + @Override + public void cancelApplication(boolean status) throws Exception { + cancel = true; + // cancel process + processTask.cancelApplication(); + int taskInstId = taskProps.getTaskInstId(); + TaskInstance taskInstance = processDao.findTaskInstanceById(taskInstId); + if (status && taskInstance != null){ + ProcessUtils.killYarnJob(taskInstance); + } + } + + /** + * create command + */ + protected abstract String buildCommand() throws Exception; +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/PythonCommandExecutor.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/PythonCommandExecutor.java new file mode 100644 index 0000000000..6e4d015734 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/PythonCommandExecutor.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task; + +import cn.escheduler.common.utils.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Date; +import java.util.List; +import java.util.function.Consumer; + +/** + * python command executor + */ +public class PythonCommandExecutor extends AbstractCommandExecutor { + + public static final String PYTHON = "python"; + + + + public PythonCommandExecutor(Consumer> logHandler, + String taskDir, String taskAppId, String tenantCode, String envFile, + Date startTime, int timeout, Logger logger) { + super(logHandler,taskDir,taskAppId, tenantCode, envFile, startTime, timeout, logger); + } + + + /** + * build command file path + * + * @return + */ + @Override + protected String buildCommandFilePath() { + return String.format("%s/py_%s.command", taskDir, taskAppId); + } + + /** + * create command file if not exists + * + * @param commandFile + * @throws IOException + */ + @Override + protected void createCommandFileIfNotExists(String execCommand, String commandFile) throws IOException { + logger.info("proxy user:{}, work dir:{}", tenantCode, taskDir); + + if (!Files.exists(Paths.get(commandFile))) { + logger.info("generate command file:{}", commandFile); + + StringBuilder sb = new StringBuilder(200); + sb.append("#-*- encoding=utf8 -*-\n"); + sb.append("import os,sys\n"); + sb.append("BASEDIR = os.path.dirname(os.path.realpath(__file__))\n"); + sb.append("os.chdir(BASEDIR)\n"); + + if (StringUtils.isNotEmpty(envFile)) { + String[] envArray = envFile.split("\\."); + if(envArray.length == 2){ + String path = envArray[0]; + logger.info("path:"+path); + int index = path.lastIndexOf("/"); + sb.append(String.format("sys.path.append('%s')\n",path.substring(0,index))); + sb.append(String.format("import %s\n",path.substring(index+1))); + } + } + + sb.append("\n\n"); + sb.append(String.format("import py_%s_node\n",taskAppId)); + logger.info(sb.toString()); + + // write data to file + FileUtils.writeStringToFile(new File(commandFile), sb.toString(), StandardCharsets.UTF_8); + } + } + + @Override + protected String commandType() { + return PYTHON; + } + + @Override + protected boolean checkShowLog(String line) { + return true; + } + + @Override + protected boolean checkFindApp(String line) { + return true; + } + +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/ShellCommandExecutor.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/ShellCommandExecutor.java new file mode 100644 index 0000000000..b5e803ae80 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/ShellCommandExecutor.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task; + +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Date; +import java.util.List; +import java.util.function.Consumer; + +/** + * command executor + * + * 进程,真正在worker服务器上执行的任务 + */ +public class ShellCommandExecutor extends AbstractCommandExecutor { + + public static final String SH = "sh"; + + + public ShellCommandExecutor(Consumer> logHandler, + String taskDir, String taskAppId, String tenantCode, String envFile, + Date startTime, int timeout, Logger logger) { + super(logHandler,taskDir,taskAppId, tenantCode, envFile, startTime, timeout, logger); + } + + + @Override + protected String buildCommandFilePath() { + // command file + return String.format("%s/%s.command", taskDir, taskAppId); + } + + @Override + protected String commandType() { + return SH; + } + + @Override + protected boolean checkShowLog(String line) { + return line.contains(taskAppId) || !line.contains("cn.escheduler.server.worker.log.TaskLogger"); + } + + @Override + protected boolean checkFindApp(String line) { + return line.contains(taskAppId); + } + + @Override + protected void createCommandFileIfNotExists(String execCommand, String commandFile) throws IOException { + logger.info("tenantCode user:{}, task dir:{}", tenantCode, taskAppId); + + // create if non existence + if (!Files.exists(Paths.get(commandFile))) { + logger.info("create command file:{}", commandFile); + + StringBuilder sb = new StringBuilder(); + sb.append("#!/bin/sh\n"); + sb.append("BASEDIR=$(cd `dirname $0`; pwd)\n"); + sb.append("cd $BASEDIR\n"); + + if (envFile != null) { + sb.append("source " + envFile + "\n"); + } + + sb.append("\n\n"); + sb.append(execCommand); + logger.info("command : {}",sb.toString()); + + // write data to file + FileUtils.writeStringToFile(new File(commandFile), sb.toString(), + Charset.forName("UTF-8")); + } + } + + +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/TaskManager.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/TaskManager.java new file mode 100644 index 0000000000..e23a29ae08 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/TaskManager.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task; + + +import cn.escheduler.common.enums.TaskType; +import cn.escheduler.server.worker.task.dependent.DependentTask; +import cn.escheduler.server.worker.task.mr.MapReduceTask; +import cn.escheduler.server.worker.task.processdure.ProcedureTask; +import cn.escheduler.server.worker.task.python.PythonTask; +import cn.escheduler.server.worker.task.shell.ShellTask; +import cn.escheduler.server.worker.task.spark.SparkTask; +import cn.escheduler.server.worker.task.sql.SqlTask; +import org.apache.commons.lang3.EnumUtils; +import org.slf4j.Logger; + +/** + * task manaster + */ +public class TaskManager { + + + /** + * create new task + * @param taskType + * @param props + * @param logger + * @return + * @throws IllegalArgumentException + */ + public static AbstractTask newTask(String taskType, TaskProps props, Logger logger) + throws IllegalArgumentException { + switch (EnumUtils.getEnum(TaskType.class,taskType)) { + case SHELL: + return new ShellTask(props, logger); + case PROCEDURE: + return new ProcedureTask(props, logger); + case SQL: + return new SqlTask(props, logger); + case MR: + return new MapReduceTask(props, logger); + case SPARK: + return new SparkTask(props, logger); + case PYTHON: + return new PythonTask(props, logger); + case DEPENDENT: + return new DependentTask(props, logger); + default: + logger.error("unsupport task type: {}", taskType); + throw new IllegalArgumentException("not support task type"); + } + } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/TaskProps.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/TaskProps.java new file mode 100644 index 0000000000..053b5bed24 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/TaskProps.java @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task; + +import cn.escheduler.common.enums.DataType; +import cn.escheduler.common.enums.Direct; +import cn.escheduler.common.enums.TaskTimeoutStrategy; +import cn.escheduler.common.process.Property; + +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +/** + * task props + */ +public class TaskProps { + + /** + * task node name + **/ + private String nodeName; + + /** + * task instance id + **/ + private int taskInstId; + + /** + * tenant code , execute task linux user + **/ + private String tenantCode; + + /** + * task parameters + **/ + private String taskParams; + + /** + * task dir + **/ + private String taskDir; + + /** + * queue + **/ + private String queue; + + /** + * env file + **/ + private String envFile; + + /** + * defined params + **/ + private Map definedParams; + + /** + * task path + */ + private String taskAppId; + + /** + * task start time + */ + private Date taskStartTime; + + /** + * task timeout + */ + private int taskTimeout; + + /** + * task timeout strategy + */ + private TaskTimeoutStrategy taskTimeoutStrategy; + /** + * task dependence + */ + private String dependence; + + /** + * schedule time + * @return + */ + private Date scheduleTime; + + + public String getTenantCode() { + return tenantCode; + } + + public void setTenantCode(String tenantCode) { + this.tenantCode = tenantCode; + } + + public String getTaskParams() { + return taskParams; + } + + public void setTaskParams(String taskParams) { + this.taskParams = taskParams; + } + + public String getTaskDir() { + return taskDir; + } + + public void setTaskDir(String taskDir) { + this.taskDir = taskDir; + } + + public Map getDefinedParams() { + return definedParams; + } + + public void setDefinedParams(Map definedParams) { + this.definedParams = definedParams; + } + + public String getEnvFile() { + return envFile; + } + + public void setEnvFile(String envFile) { + this.envFile = envFile; + } + + + public String getNodeName() { + return nodeName; + } + + public void setNodeName(String nodeName) { + this.nodeName = nodeName; + } + + public int getTaskInstId() { + return taskInstId; + } + + public void setTaskInstId(int taskInstId) { + this.taskInstId = taskInstId; + } + + public String getQueue() { + return queue; + } + + public void setQueue(String queue) { + this.queue = queue; + } + + + public String getTaskAppId() { + return taskAppId; + } + + public void setTaskAppId(String taskAppId) { + this.taskAppId = taskAppId; + } + + public Date getTaskStartTime() { + return taskStartTime; + } + + public void setTaskStartTime(Date taskStartTime) { + this.taskStartTime = taskStartTime; + } + + public int getTaskTimeout() { + return taskTimeout; + } + + public void setTaskTimeout(int taskTimeout) { + this.taskTimeout = taskTimeout; + } + + public TaskTimeoutStrategy getTaskTimeoutStrategy() { + return taskTimeoutStrategy; + } + + public void setTaskTimeoutStrategy(TaskTimeoutStrategy taskTimeoutStrategy) { + this.taskTimeoutStrategy = taskTimeoutStrategy; + } + + /** + * get parameters map + * @return + */ + public Map getUserDefParamsMap() { + if (definedParams != null) { + Map userDefParamsMaps = new HashMap<>(); + Iterator> iter = definedParams.entrySet().iterator(); + while (iter.hasNext()){ + Map.Entry en = iter.next(); + Property property = new Property(en.getKey(), Direct.IN, DataType.VARCHAR , en.getValue()); + userDefParamsMaps.put(property.getProp(),property); + } + return userDefParamsMaps; + } + return null; + } + + public String getDependence() { + return dependence; + } + + public void setDependence(String dependence) { + this.dependence = dependence; + } + + public Date getScheduleTime() { + return scheduleTime; + } + + public void setScheduleTime(Date scheduleTime) { + this.scheduleTime = scheduleTime; + } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/dependent/DependentExecute.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/dependent/DependentExecute.java new file mode 100644 index 0000000000..1e46e3e38d --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/dependent/DependentExecute.java @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task.dependent; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.DependResult; +import cn.escheduler.common.enums.DependentRelation; +import cn.escheduler.common.enums.ExecutionStatus; +import cn.escheduler.common.model.DateInterval; +import cn.escheduler.common.model.DependentItem; +import cn.escheduler.common.utils.DependentUtils; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.dao.model.TaskInstance; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +/** + * dependent item execute + */ +public class DependentExecute { + /** + * process dao + */ + private static final ProcessDao processDao = DaoFactory.getDaoInstance(ProcessDao.class); + + private List dependItemList; + private DependentRelation relation; + + private DependResult modelDependResult = DependResult.WAITING; + private Map dependResultMap = new HashMap<>(); + + private Logger logger = LoggerFactory.getLogger(DependentExecute.class); + + public DependentExecute(List itemList, DependentRelation relation){ + this.dependItemList = itemList; + this.relation = relation; + } + + /** + * get dependent item for one dependent item + * @param dependentItem + * @return + */ + public DependResult getDependentResultForItem(DependentItem dependentItem, Date currentTime){ + List dateIntervals = DependentUtils.getDateIntervalList(currentTime, dependentItem.getDateValue()); + return calculateResultForTasks(dependentItem, dateIntervals ); + } + + /** + * calculate dependent result for one dependent item. + * @param dependentItem + * @param dateIntervals + * @return + */ + private DependResult calculateResultForTasks(DependentItem dependentItem, + List dateIntervals) { + DependResult result = DependResult.FAILED; + for(DateInterval dateInterval : dateIntervals){ + ProcessInstance processInstance = findLastProcessInterval(dependentItem.getDefinitionId(), + dateInterval); + if(processInstance == null){ + logger.error("cannot find the right process instance: definition id:{}, start:{}, end:{}", + dependentItem.getDefinitionId(), dateInterval.getStartTime(), dateInterval.getEndTime() ); + return DependResult.FAILED; + } + if(dependentItem.getDepTasks().equals(Constants.DEPENDENT_ALL)){ + result = getDependResultByState(processInstance.getState()); + }else{ + TaskInstance taskInstance = null; + List taskInstanceList = processDao.findValidTaskListByProcessId(processInstance.getId()); + + for(TaskInstance task : taskInstanceList){ + if(task.getName().equals(dependentItem.getDepTasks())){ + taskInstance = task; + break; + } + } + if(taskInstance == null){ + // cannot find task in the process instance + // maybe because process instance is running or failed. + result = getDependResultByState(processInstance.getState()); + }else{ + result = getDependResultByState(taskInstance.getState()); + } + } + if(result != DependResult.SUCCESS){ + break; + } + } + return result; + } + + /** + * find the last one process instance that : + * 1. manual run and finish between the interval + * 2. schedule run and schedule time between the interval + * @param definitionId + * @param dateInterval + * @return + */ + private ProcessInstance findLastProcessInterval(int definitionId, DateInterval dateInterval) { + + ProcessInstance runningProcess = processDao.findLastRunningProcess(definitionId, dateInterval); + if(runningProcess != null){ + return runningProcess; + } + + ProcessInstance lastSchedulerProcess = processDao.findLastSchedulerProcessInterval( + definitionId, dateInterval + ); + + ProcessInstance lastManualProcess = processDao.findLastManualProcessInterval( + definitionId, dateInterval + ); + + if(lastManualProcess ==null){ + return lastSchedulerProcess; + } + if(lastSchedulerProcess == null){ + return lastManualProcess; + } + + return (lastManualProcess.getEndTime().after(lastSchedulerProcess.getEndTime()))? + lastManualProcess : lastSchedulerProcess; + } + + /** + * get dependent result by task/process instance state + * @param state + * @return + */ + private DependResult getDependResultByState(ExecutionStatus state) { + + if(state.typeIsRunning() || state == ExecutionStatus.SUBMITTED_SUCCESS || state == ExecutionStatus.WAITTING_THREAD){ + return DependResult.WAITING; + }else if(state.typeIsSuccess()){ + return DependResult.SUCCESS; + }else{ + return DependResult.FAILED; + } + } + + /** + * judge depend item finished + * @return + */ + public boolean finish(Date currentTime){ + if(modelDependResult == DependResult.WAITING){ + modelDependResult = getModelDependResult(currentTime); + return false; + } + return true; + } + + /** + * get model depend result + * @return + */ + public DependResult getModelDependResult(Date currentTime){ + + List dependResultList = new ArrayList<>(); + + for(DependentItem dependentItem : dependItemList){ + DependResult dependResult = getDependResultForItem(dependentItem, currentTime); + if(dependResult != DependResult.WAITING){ + dependResultMap.put(dependentItem.getKey(), dependResult); + } + dependResultList.add(dependResult); + } + modelDependResult = DependentUtils.getDependResultForRelation( + this.relation, dependResultList + ); + return modelDependResult; + } + + /** + * get dependent item result + * @param item + * @return + */ + public DependResult getDependResultForItem(DependentItem item, Date currentTime){ + String key = item.getKey(); + if(dependResultMap.containsKey(key)){ + return dependResultMap.get(key); + } + return getDependentResultForItem(item, currentTime); + } + + public Map getDependResultMap(){ + return dependResultMap; + } + + + +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/dependent/DependentTask.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/dependent/DependentTask.java new file mode 100644 index 0000000000..21e596f55f --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/dependent/DependentTask.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task.dependent; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.DependResult; +import cn.escheduler.common.enums.ExecutionStatus; +import cn.escheduler.common.model.DependentTaskModel; +import cn.escheduler.common.task.AbstractParameters; +import cn.escheduler.common.task.dependent.DependentParameters; +import cn.escheduler.common.utils.DependentUtils; +import cn.escheduler.common.utils.JSONUtils; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.server.worker.task.AbstractTask; +import cn.escheduler.server.worker.task.TaskProps; +import org.slf4j.Logger; + +import java.util.*; + +import static cn.escheduler.common.Constants.DEPENDENT_SPLIT; + +public class DependentTask extends AbstractTask { + + private List dependentTaskList = new ArrayList<>(); + + /** + * depend item result map + * save the result to log file + */ + private Map dependResultMap = new HashMap<>(); + + private DependentParameters dependentParameters; + + private Date dependentDate; + + private ProcessDao processDao; + + public DependentTask(TaskProps props, Logger logger) { + super(props, logger); + } + + @Override + public void init(){ + logger.info("dependent task initialize"); + + this.dependentParameters = JSONUtils.parseObject(this.taskProps.getDependence(), + DependentParameters.class); + + for(DependentTaskModel taskModel : dependentParameters.getDependTaskList()){ + this.dependentTaskList.add(new DependentExecute( + taskModel.getDependItemList(), taskModel.getRelation())); + } + + this.processDao = DaoFactory.getDaoInstance(ProcessDao.class); + + if(taskProps.getScheduleTime() != null){ + this.dependentDate = taskProps.getScheduleTime(); + }else{ + this.dependentDate = taskProps.getTaskStartTime(); + } + + } + + @Override + public void handle(){ + // set the name of the current thread + String threadLoggerInfoName = String.format("TaskLogInfo-%s", taskProps.getTaskAppId()); + Thread.currentThread().setName(threadLoggerInfoName); + + try{ + TaskInstance taskInstance = null; + while(true){ + taskInstance = processDao.findTaskInstanceById(this.taskProps.getTaskInstId()); + if(taskInstance.getState() == ExecutionStatus.KILL){ + this.cancel = true; + } + if(this.cancel || allDependentTaskFinish()){ + break; + } + Thread.sleep(Constants.SLEEP_TIME_MILLIS); + + } + if(cancel){ + exitStatusCode = Constants.EXIT_CODE_KILL; + }else{ + DependResult result = getTaskDependResult(); + exitStatusCode = (result == DependResult.SUCCESS) ? + Constants.EXIT_CODE_SUCCESS : Constants.EXIT_CODE_FAILURE; + } + }catch (Exception e){ + logger.error("Exception " + e); + exitStatusCode = -1; + } + } + + /** + * get dependent result + * @return + */ + private DependResult getTaskDependResult(){ + List dependResultList = new ArrayList<>(); + for(DependentExecute dependentExecute : dependentTaskList){ + DependResult dependResult = dependentExecute.getModelDependResult(dependentDate); + dependResultList.add(dependResult); + } + DependResult result = DependentUtils.getDependResultForRelation( + this.dependentParameters.getRelation(), dependResultList + ); + return result; + } + + /** + * judge all dependent tasks finish + * @return + */ + private boolean allDependentTaskFinish(){ + boolean finish = true; + for(DependentExecute dependentExecute : dependentTaskList){ + Map resultMap = dependentExecute.getDependResultMap(); + Set keySet = resultMap.keySet(); + for(String key : keySet){ + if(!dependResultMap.containsKey(key)){ + dependResultMap.put(key, resultMap.get(key)); + //save depend result to log + logger.info("dependent item complete {} {},{}", + DEPENDENT_SPLIT, key, resultMap.get(key).toString()); + } + } + if(!dependentExecute.finish(dependentDate)){ + finish = false; + } + } + return finish; + } + + + @Override + public void cancelApplication(boolean cancelApplication) throws Exception { + // cancel process + this.cancel = true; + } + + @Override + public AbstractParameters getParameters() { + return null; + } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/mr/MapReduceTask.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/mr/MapReduceTask.java new file mode 100644 index 0000000000..971ecd0b6d --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/mr/MapReduceTask.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task.mr; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.ProgramType; +import cn.escheduler.common.process.Property; +import cn.escheduler.common.task.AbstractParameters; +import cn.escheduler.common.task.mr.MapreduceParameters; +import cn.escheduler.common.utils.JSONUtils; +import cn.escheduler.common.utils.ParameterUtils; +import cn.escheduler.server.utils.ParamUtils; +import cn.escheduler.server.worker.task.AbstractYarnTask; +import cn.escheduler.server.worker.task.TaskProps; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * mapreduce task + */ +public class MapReduceTask extends AbstractYarnTask { + + + /** + * mapreduce parameters + */ + private MapreduceParameters mapreduceParameters; + + /** + * @param props + * @param logger + */ + public MapReduceTask(TaskProps props, Logger logger) { + super(props, logger); + } + + @Override + public void init() { + + logger.info("mapreduce task params {}", taskProps.getTaskParams()); + + this.mapreduceParameters = JSONUtils.parseObject(taskProps.getTaskParams(), MapreduceParameters.class); + + // check parameters + if (!mapreduceParameters.checkParameters()) { + throw new RuntimeException("mapreduce task params is not valid"); + } + + mapreduceParameters.setQueue(taskProps.getQueue()); + + // replace placeholder + Map paramsMap = ParamUtils.convert(taskProps.getUserDefParamsMap(), + taskProps.getDefinedParams(), + mapreduceParameters.getLocalParametersMap(), + processInstance.getCmdTypeIfComplement(), + processInstance.getScheduleTime()); + if (paramsMap != null){ + String args = ParameterUtils.convertParameterPlaceholders(mapreduceParameters.getMainArgs(), ParamUtils.convert(paramsMap)); + mapreduceParameters.setMainArgs(args); + if(mapreduceParameters.getProgramType() != null && mapreduceParameters.getProgramType() == ProgramType.PYTHON){ + String others = ParameterUtils.convertParameterPlaceholders(mapreduceParameters.getOthers(), ParamUtils.convert(paramsMap)); + mapreduceParameters.setOthers(others); + } + } + } + + @Override + protected String buildCommand() throws Exception { + List parameterList = buildParameters(mapreduceParameters); + + String command = ParameterUtils.convertParameterPlaceholders(String.join(" ", parameterList), taskProps.getDefinedParams()); + logger.info("mapreduce task command: {}", command); + + return command; + } + + @Override + public AbstractParameters getParameters() { + return mapreduceParameters; + } + + + private List buildParameters(MapreduceParameters mapreduceParameters){ + + List result = new ArrayList<>(); + + result.add(Constants.HADOOP); + + // main jar + if(mapreduceParameters.getMainJar()!= null){ + result.add(Constants.JAR); + result.add(mapreduceParameters.getMainJar().getRes()); + } + + // main class + if(mapreduceParameters.getProgramType() !=null ){ + if(mapreduceParameters.getProgramType()!= ProgramType.PYTHON){ + if(StringUtils.isNotEmpty(mapreduceParameters.getMainClass())){ + result.add(mapreduceParameters.getMainClass()); + } + } + } + + // others + if (StringUtils.isNotEmpty(mapreduceParameters.getOthers())) { + String others = mapreduceParameters.getOthers(); + if(!others.contains(Constants.MR_QUEUE)){ + if (StringUtils.isNotEmpty(mapreduceParameters.getQueue())) { + result.add(String.format("%s %s=%s", Constants.D, Constants.MR_QUEUE, mapreduceParameters.getQueue())); + } + } + result.add(mapreduceParameters.getOthers()); + }else if (StringUtils.isNotEmpty(mapreduceParameters.getQueue())) { + result.add(String.format("%s %s=%s", Constants.D, Constants.MR_QUEUE, mapreduceParameters.getQueue())); + + } + + // command args + if(StringUtils.isNotEmpty(mapreduceParameters.getMainArgs())){ + result.add(mapreduceParameters.getMainArgs()); + } + return result; + } +} + diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/processdure/ProcedureTask.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/processdure/ProcedureTask.java new file mode 100644 index 0000000000..2efe8cbf54 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/processdure/ProcedureTask.java @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task.processdure; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.DataType; +import cn.escheduler.common.enums.DbType; +import cn.escheduler.common.enums.Direct; +import cn.escheduler.common.enums.TaskTimeoutStrategy; +import cn.escheduler.common.job.db.BaseDataSource; +import cn.escheduler.common.job.db.MySQLDataSource; +import cn.escheduler.common.job.db.PostgreDataSource; +import cn.escheduler.common.process.Property; +import cn.escheduler.common.task.AbstractParameters; +import cn.escheduler.common.task.procedure.ProcedureParameters; +import cn.escheduler.common.utils.CollectionUtils; +import cn.escheduler.common.utils.ParameterUtils; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.DataSource; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.server.utils.ParamUtils; +import cn.escheduler.server.worker.task.AbstractTask; +import cn.escheduler.server.worker.task.TaskProps; +import com.alibaba.fastjson.JSONObject; +import com.cronutils.utils.StringUtils; +import org.slf4j.Logger; + +import java.sql.*; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +/** + * procedure task + */ +public class ProcedureTask extends AbstractTask { + + /** + * procedure parameters + */ + private ProcedureParameters procedureParameters; + + /** + * process database access + */ + private ProcessDao processDao; + + public ProcedureTask(TaskProps taskProps, Logger logger) { + super(taskProps, logger); + + logger.info("procedure task params {}", taskProps.getTaskParams()); + + this.procedureParameters = JSONObject.parseObject(taskProps.getTaskParams(), ProcedureParameters.class); + + // check parameters + if (!procedureParameters.checkParameters()) { + throw new RuntimeException("procedure task params is not valid"); + } + + this.processDao = DaoFactory.getDaoInstance(ProcessDao.class); + } + + @Override + public void handle() throws Exception { + // set the name of the current thread + String threadLoggerInfoName = String.format("TaskLogInfo-%s", taskProps.getTaskAppId()); + Thread.currentThread().setName(threadLoggerInfoName); + + logger.info("processdure type : {}, datasource : {}, method : {} , localParams : {}", + procedureParameters.getType(), + procedureParameters.getDatasource(), + procedureParameters.getMethod(), + procedureParameters.getLocalParams()); + + // determine whether there is a data source + if (procedureParameters.getDatasource() == 0){ + logger.error("datasource is null"); + exitStatusCode = 0; + }else { + + DataSource dataSource = processDao.findDataSourceById(procedureParameters.getDatasource()); + logger.info("datasource name : {} , type : {} , desc : {} , user_id : {} , parameter : {}", + dataSource.getName(),dataSource.getType(),dataSource.getNote(), + dataSource.getUserId(),dataSource.getConnectionParams()); + + if (dataSource != null){ + Connection connection = null; + CallableStatement stmt = null; + try { + BaseDataSource baseDataSource = null; + + if (DbType.MYSQL.name().equals(dataSource.getType().name())){ + baseDataSource = JSONObject.parseObject(dataSource.getConnectionParams(),MySQLDataSource.class); + Class.forName(Constants.JDBC_MYSQL_CLASS_NAME); + }else if (DbType.POSTGRESQL.name().equals(dataSource.getType().name())){ + baseDataSource = JSONObject.parseObject(dataSource.getConnectionParams(),PostgreDataSource.class); + Class.forName(Constants.JDBC_POSTGRESQL_CLASS_NAME); + } + + // get jdbc connection + connection = DriverManager.getConnection(baseDataSource.getJdbcUrl(), + baseDataSource.getUser(), + baseDataSource.getPassword()); + + // get process instance by task instance id + ProcessInstance processInstance = processDao.findProcessInstanceByTaskId(taskProps.getTaskInstId()); + + // combining local and global parameters + Map paramsMap = ParamUtils.convert(taskProps.getUserDefParamsMap(), + taskProps.getDefinedParams(), + procedureParameters.getLocalParametersMap(), + processInstance.getCmdTypeIfComplement(), + processInstance.getScheduleTime()); + + + Collection userDefParamsList = null; + + if (procedureParameters.getLocalParametersMap() != null){ + userDefParamsList = procedureParameters.getLocalParametersMap().values(); + } + + String method = ""; + // no parameters + if (CollectionUtils.isEmpty(userDefParamsList)){ + method = "{call " + procedureParameters.getMethod() + "}"; + }else { // exists parameters + int size = userDefParamsList.size(); + StringBuilder parameter = new StringBuilder(); + parameter.append("("); + for (int i = 0 ;i < size - 1; i++){ + parameter.append("?,"); + } + parameter.append("?)"); + method = "{call " + procedureParameters.getMethod() + parameter.toString()+ "}"; + } + + logger.info("call method : {}",method); + // call method + stmt = connection.prepareCall(method); + if(taskProps.getTaskTimeoutStrategy() == TaskTimeoutStrategy.FAILED || taskProps.getTaskTimeoutStrategy() == TaskTimeoutStrategy.WARNFAILED){ + stmt.setQueryTimeout(taskProps.getTaskTimeout()); + } + Map outParameterMap = new HashMap<>(); + if (userDefParamsList != null && userDefParamsList.size() > 0){ + int index = 1; + for (Property property : userDefParamsList){ + logger.info("localParams : prop : {} , dirct : {} , type : {} , value : {}" + ,property.getProp(), + property.getDirect(), + property.getType(), + property.getValue()); + // set parameters + if (property.getDirect().equals(Direct.IN)){ + ParameterUtils.setInParameter(index,stmt,property.getType(),paramsMap.get(property.getProp()).getValue()); + }else if (property.getDirect().equals(Direct.OUT)){ + setOutParameter(index,stmt,property.getType(),paramsMap.get(property.getProp()).getValue()); + property.setValue(paramsMap.get(property.getProp()).getValue()); + outParameterMap.put(index,property); + } + index++; + } + } + + stmt.executeUpdate(); + + /** + * print the output parameters to the log + */ + Iterator> iter = outParameterMap.entrySet().iterator(); + while (iter.hasNext()){ + Map.Entry en = iter.next(); + + int index = en.getKey(); + Property property = en.getValue(); + String prop = property.getProp(); + DataType dataType = property.getType(); + + if (dataType.equals(DataType.VARCHAR)){ + String value = stmt.getString(index); + logger.info("out prameter key : {} , value : {}",prop,value); + }else if (dataType.equals(DataType.INTEGER)){ + int value = stmt.getInt(index); + logger.info("out prameter key : {} , value : {}",prop,value); + }else if (dataType.equals(DataType.LONG)){ + long value = stmt.getLong(index); + logger.info("out prameter key : {} , value : {}",prop,value); + }else if (dataType.equals(DataType.FLOAT)){ + float value = stmt.getFloat(index); + logger.info("out prameter key : {} , value : {}",prop,value); + }else if (dataType.equals(DataType.DOUBLE)){ + double value = stmt.getDouble(index); + logger.info("out prameter key : {} , value : {}",prop,value); + }else if (dataType.equals(DataType.DATE)){ + Date value = stmt.getDate(index); + logger.info("out prameter key : {} , value : {}",prop,value); + }else if (dataType.equals(DataType.TIME)){ + Time value = stmt.getTime(index); + logger.info("out prameter key : {} , value : {}",prop,value); + }else if (dataType.equals(DataType.TIMESTAMP)){ + Timestamp value = stmt.getTimestamp(index); + logger.info("out prameter key : {} , value : {}",prop,value); + }else if (dataType.equals(DataType.BOOLEAN)){ + boolean value = stmt.getBoolean(index); + logger.info("out prameter key : {} , value : {}",prop,value); + } + } + + exitStatusCode = 0; + }catch (Exception e){ + logger.error(e.getMessage(),e); + exitStatusCode = -1; + throw new RuntimeException("process interrupted. exit status code is : " + exitStatusCode); + } + finally { + if (stmt != null) { + try { + stmt.close(); + } catch (SQLException e) { + exitStatusCode = -1; + logger.error(e.getMessage(),e); + } + } + if (connection != null) { + try { + connection.close(); + } catch (SQLException e) { + exitStatusCode = -1; + logger.error(e.getMessage(), e); + } + } + } + } + } + } + + @Override + public AbstractParameters getParameters() { + return procedureParameters; + } + + /** + * set out parameter + * @param index + * @param stmt + * @param dataType + * @param value + * @throws Exception + */ + private void setOutParameter(int index,CallableStatement stmt,DataType dataType,String value)throws Exception{ + if (dataType.equals(DataType.VARCHAR)){ + if (StringUtils.isEmpty(value)){ + stmt.registerOutParameter(index, Types.VARCHAR); + }else { + stmt.registerOutParameter(index, Types.VARCHAR, value); + } + + }else if (dataType.equals(DataType.INTEGER)){ + if (StringUtils.isEmpty(value)){ + stmt.registerOutParameter(index, Types.INTEGER); + }else { + stmt.registerOutParameter(index, Types.INTEGER, value); + } + + }else if (dataType.equals(DataType.LONG)){ + if (StringUtils.isEmpty(value)){ + stmt.registerOutParameter(index,Types.INTEGER); + }else { + stmt.registerOutParameter(index,Types.INTEGER ,value); + } + }else if (dataType.equals(DataType.FLOAT)){ + if (StringUtils.isEmpty(value)){ + stmt.registerOutParameter(index, Types.FLOAT); + }else { + stmt.registerOutParameter(index, Types.FLOAT,value); + } + }else if (dataType.equals(DataType.DOUBLE)){ + if (StringUtils.isEmpty(value)){ + stmt.registerOutParameter(index, Types.DOUBLE); + }else { + stmt.registerOutParameter(index, Types.DOUBLE , value); + } + + }else if (dataType.equals(DataType.DATE)){ + if (StringUtils.isEmpty(value)){ + stmt.registerOutParameter(index, Types.DATE); + }else { + stmt.registerOutParameter(index, Types.DATE , value); + } + + }else if (dataType.equals(DataType.TIME)){ + if (StringUtils.isEmpty(value)){ + stmt.registerOutParameter(index, Types.TIME); + }else { + stmt.registerOutParameter(index, Types.TIME , value); + } + + }else if (dataType.equals(DataType.TIMESTAMP)){ + if (StringUtils.isEmpty(value)){ + stmt.registerOutParameter(index, Types.TIMESTAMP); + }else { + stmt.registerOutParameter(index, Types.TIMESTAMP , value); + } + + }else if (dataType.equals(DataType.BOOLEAN)){ + if (StringUtils.isEmpty(value)){ + stmt.registerOutParameter(index, Types.BOOLEAN); + }else { + stmt.registerOutParameter(index, Types.BOOLEAN , value); + } + } + } +} \ No newline at end of file diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/python/PythonTask.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/python/PythonTask.java new file mode 100644 index 0000000000..2b7ae29b96 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/python/PythonTask.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task.python; + + +import cn.escheduler.common.process.Property; +import cn.escheduler.common.task.AbstractParameters; +import cn.escheduler.common.task.python.PythonParameters; +import cn.escheduler.common.utils.CommonUtils; +import cn.escheduler.common.utils.JSONUtils; +import cn.escheduler.common.utils.ParameterUtils; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.server.utils.ParamUtils; +import cn.escheduler.server.worker.task.AbstractTask; +import cn.escheduler.server.worker.task.PythonCommandExecutor; +import cn.escheduler.server.worker.task.TaskProps; +import org.slf4j.Logger; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.Map; +import java.util.Set; + +/** + * python task + */ +public class PythonTask extends AbstractTask { + + /** + * python parameters + */ + private PythonParameters pythonParameters; + + /** + * task dir + */ + private String taskDir; + + private PythonCommandExecutor pythonProcessTask; + + /** + * process database access + */ + private ProcessDao processDao; + + + public PythonTask(TaskProps taskProps, Logger logger) { + super(taskProps, logger); + + this.taskDir = taskProps.getTaskDir(); + + this.pythonProcessTask = new PythonCommandExecutor(this::logHandle, + taskProps.getTaskDir(), taskProps.getTaskAppId(), + taskProps.getTenantCode(), CommonUtils.getPythonSystemEnvPath(), taskProps.getTaskStartTime(), + taskProps.getTaskTimeout(), logger); + this.processDao = DaoFactory.getDaoInstance(ProcessDao.class); + } + + @Override + public void init() { + logger.info("python task params {}", taskProps.getTaskParams()); + + pythonParameters = JSONUtils.parseObject(taskProps.getTaskParams(), PythonParameters.class); + + if (!pythonParameters.checkParameters()) { + throw new RuntimeException("python task params is not valid"); + } + } + + @Override + public void handle() throws Exception { + try { + // construct process + exitStatusCode = pythonProcessTask.run(buildCommand(), processDao); + } catch (Exception e) { + logger.error("python process exception", e); + exitStatusCode = -1; + } + } + + @Override + public void cancelApplication(boolean cancelApplication) throws Exception { + // cancel process + pythonProcessTask.cancelApplication(); + } + + /** + * build command + * @return + * @throws Exception + */ + private String buildCommand() throws Exception { + // generate scripts + String fileName = String.format("%s/py_%s_node.py", taskDir, taskProps.getTaskAppId()); + Path path = new File(fileName).toPath(); + + + + if (Files.exists(path)) { + return fileName; + } + + String rawScript = pythonParameters.getRawScript().replaceAll("\\r\\n", "\n"); + + + // find process instance by task id + ProcessInstance processInstance = processDao.findProcessInstanceByTaskId(taskProps.getTaskInstId()); + + /** + * combining local and global parameters + */ + Map paramsMap = ParamUtils.convert(taskProps.getUserDefParamsMap(), + taskProps.getDefinedParams(), + pythonParameters.getLocalParametersMap(), + processInstance.getCmdTypeIfComplement(), + processInstance.getScheduleTime()); + if (paramsMap != null){ + rawScript = ParameterUtils.convertParameterPlaceholders(rawScript, ParamUtils.convert(paramsMap)); + } + + + pythonParameters.setRawScript(rawScript); + + logger.info("raw script : {}", pythonParameters.getRawScript()); + logger.info("task dir : {}", taskDir); + + Set perms = PosixFilePermissions.fromString("rwxr-xr-x"); + FileAttribute> attr = PosixFilePermissions.asFileAttribute(perms); + + Files.createFile(path, attr); + + Files.write(path, pythonParameters.getRawScript().getBytes(), StandardOpenOption.APPEND); + + return fileName; + } + + @Override + public AbstractParameters getParameters() { + return pythonParameters; + } + + + +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/shell/ShellTask.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/shell/ShellTask.java new file mode 100644 index 0000000000..b8564e8f95 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/shell/ShellTask.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task.shell; + + +import cn.escheduler.common.Constants; +import cn.escheduler.common.process.Property; +import cn.escheduler.common.task.AbstractParameters; +import cn.escheduler.common.task.shell.ShellParameters; +import cn.escheduler.common.utils.JSONUtils; +import cn.escheduler.common.utils.ParameterUtils; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.server.utils.ParamUtils; +import cn.escheduler.server.worker.task.AbstractTask; +import cn.escheduler.server.worker.task.ShellCommandExecutor; +import cn.escheduler.server.worker.task.TaskProps; +import org.slf4j.Logger; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.Map; +import java.util.Set; + +/** + * shell task + */ +public class ShellTask extends AbstractTask { + + private ShellParameters shellParameters; + + /** + * task dir + */ + private String taskDir; + + private ShellCommandExecutor processTask; + + /** + * process database access + */ + private ProcessDao processDao; + + + public ShellTask(TaskProps props, Logger logger) { + super(props, logger); + + this.taskDir = props.getTaskDir(); + + this.processTask = new ShellCommandExecutor(this::logHandle, + props.getTaskDir(), props.getTaskAppId(), + props.getTenantCode(), props.getEnvFile(), props.getTaskStartTime(), + props.getTaskTimeout(), logger); + this.processDao = DaoFactory.getDaoInstance(ProcessDao.class); + } + + @Override + public void init() { + logger.info("shell task params {}", taskProps.getTaskParams()); + + shellParameters = JSONUtils.parseObject(taskProps.getTaskParams(), ShellParameters.class); + + if (!shellParameters.checkParameters()) { + throw new RuntimeException("shell task params is not valid"); + } + } + + @Override + public void handle() throws Exception { + try { + // construct process + exitStatusCode = processTask.run(buildCommand(), processDao); + } catch (Exception e) { + logger.error(e.getMessage(), e); + exitStatusCode = -1; + } + } + + @Override + public void cancelApplication(boolean cancelApplication) throws Exception { + // cancel process + processTask.cancelApplication(); + } + + /** + * create command + * @return + * @throws Exception + */ + private String buildCommand() throws Exception { + // generate scripts + String fileName = String.format("%s/%s_node.sh", taskDir, taskProps.getTaskAppId()); + Path path = new File(fileName).toPath(); + + if (Files.exists(path)) { + return fileName; + } + + String script = shellParameters.getRawScript().replaceAll("\\r\\n", "\n"); + + // find process instance by task id + ProcessInstance processInstance = processDao.findProcessInstanceByTaskId(taskProps.getTaskInstId()); + + /** + * combining local and global parameters + */ + Map paramsMap = ParamUtils.convert(taskProps.getUserDefParamsMap(), + taskProps.getDefinedParams(), + shellParameters.getLocalParametersMap(), + processInstance.getCmdTypeIfComplement(), + processInstance.getScheduleTime()); + if (paramsMap != null){ + script = ParameterUtils.convertParameterPlaceholders(script, ParamUtils.convert(paramsMap)); + } + + + shellParameters.setRawScript(script); + + logger.info("raw script : {}", shellParameters.getRawScript()); + logger.info("task dir : {}", taskDir); + + Set perms = PosixFilePermissions.fromString(Constants.RWXR_XR_X); + FileAttribute> attr = PosixFilePermissions.asFileAttribute(perms); + + Files.createFile(path, attr); + + Files.write(path, shellParameters.getRawScript().getBytes(), StandardOpenOption.APPEND); + + return fileName; + } + + @Override + public AbstractParameters getParameters() { + return shellParameters; + } + + + +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/spark/SparkTask.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/spark/SparkTask.java new file mode 100644 index 0000000000..4764a96a0c --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/spark/SparkTask.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task.spark; + +import cn.escheduler.common.process.Property; +import cn.escheduler.common.task.AbstractParameters; +import cn.escheduler.common.task.spark.SparkParameters; +import cn.escheduler.common.utils.JSONUtils; +import cn.escheduler.common.utils.ParameterUtils; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.server.utils.ParamUtils; +import cn.escheduler.server.utils.SparkArgsUtils; +import cn.escheduler.server.worker.task.AbstractYarnTask; +import cn.escheduler.server.worker.task.TaskProps; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * spark task + */ +public class SparkTask extends AbstractYarnTask { + + /** + * spark command + */ + private static final String SPARK_COMMAND = "spark-submit"; + + /** + * spark parameters + */ + private SparkParameters sparkParameters; + + public SparkTask(TaskProps props, Logger logger) { + super(props, logger); + } + + @Override + public void init() { + + logger.info("spark task params {}", taskProps.getTaskParams()); + + sparkParameters = JSONUtils.parseObject(taskProps.getTaskParams(), SparkParameters.class); + + if (!sparkParameters.checkParameters()) { + throw new RuntimeException("spark task params is not valid"); + } + sparkParameters.setQueue(taskProps.getQueue()); + + if (StringUtils.isNotEmpty(sparkParameters.getMainArgs())) { + String args = sparkParameters.getMainArgs(); + // get process instance by task instance id + ProcessInstance processInstance = processDao.findProcessInstanceByTaskId(taskProps.getTaskInstId()); + + /** + * combining local and global parameters + */ + Map paramsMap = ParamUtils.convert(taskProps.getUserDefParamsMap(), + taskProps.getDefinedParams(), + sparkParameters.getLocalParametersMap(), + processInstance.getCmdTypeIfComplement(), + processInstance.getScheduleTime()); + if (paramsMap != null ){ + args = ParameterUtils.convertParameterPlaceholders(args, ParamUtils.convert(paramsMap)); + } + sparkParameters.setMainArgs(args); + } + } + + /** + * create command + * @return + */ + @Override + protected String buildCommand() { + List args = new ArrayList<>(); + + args.add(SPARK_COMMAND); + + // other parameters + args.addAll(SparkArgsUtils.buildArgs(sparkParameters)); + + String command = ParameterUtils + .convertParameterPlaceholders(String.join(" ", args), taskProps.getDefinedParams()); + + logger.info("spark task command : {}", command); + + return command; + } + + @Override + public AbstractParameters getParameters() { + return sparkParameters; + } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/worker/task/sql/SqlTask.java b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/sql/SqlTask.java new file mode 100644 index 0000000000..36d92d71b5 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/worker/task/sql/SqlTask.java @@ -0,0 +1,378 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task.sql; + +import cn.escheduler.alert.utils.MailUtils; +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.DbType; +import cn.escheduler.common.enums.ShowType; +import cn.escheduler.common.enums.TaskTimeoutStrategy; +import cn.escheduler.common.enums.UdfType; +import cn.escheduler.common.job.db.*; +import cn.escheduler.common.process.Property; +import cn.escheduler.common.task.AbstractParameters; +import cn.escheduler.common.task.sql.SqlParameters; +import cn.escheduler.common.task.sql.SqlType; +import cn.escheduler.common.utils.CollectionUtils; +import cn.escheduler.common.utils.ParameterUtils; +import cn.escheduler.dao.AlertDao; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.*; +import cn.escheduler.server.utils.ParamUtils; +import cn.escheduler.server.utils.UDFUtils; +import cn.escheduler.server.worker.task.AbstractTask; +import cn.escheduler.server.worker.task.TaskProps; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson.serializer.SerializerFeature; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.EnumUtils; +import org.slf4j.Logger; + +import java.sql.*; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * sql task + */ +public class SqlTask extends AbstractTask { + + /** + * sql parameters + */ + private SqlParameters sqlParameters; + + /** + * process database access + */ + private ProcessDao processDao; + + /** + * alert dao + */ + private AlertDao alertDao; + + + public SqlTask(TaskProps props, Logger logger) { + super(props, logger); + + logger.info("sql task params {}", taskProps.getTaskParams()); + this.sqlParameters = JSONObject.parseObject(props.getTaskParams(), SqlParameters.class); + + if (!sqlParameters.checkParameters()) { + throw new RuntimeException("sql task params is not valid"); + } + this.processDao = DaoFactory.getDaoInstance(ProcessDao.class); + this.alertDao = DaoFactory.getDaoInstance(AlertDao.class); + } + + @Override + public void handle() throws Exception { + // set the name of the current thread + String threadLoggerInfoName = String.format("TaskLogInfo-%s", taskProps.getTaskAppId()); + Thread.currentThread().setName(threadLoggerInfoName); + logger.info(sqlParameters.toString()); + logger.info("sql type : {}, datasource : {}, sql : {} , localParams : {},udfs : {},showType : {},connParams : {}", + sqlParameters.getType(), sqlParameters.getDatasource(), sqlParameters.getSql(), + sqlParameters.getLocalParams(), sqlParameters.getUdfs(), sqlParameters.getShowType(), sqlParameters.getConnParams()); + + // determine whether there is a data source + if (sqlParameters.getDatasource() == 0){ + logger.error("datasource is null"); + exitStatusCode = -1; + }else { + List createFuncs = null; + DataSource dataSource = processDao.findDataSourceById(sqlParameters.getDatasource()); + logger.info("datasource name : {} , type : {} , desc : {} , user_id : {} , parameter : {}", + dataSource.getName(),dataSource.getType(),dataSource.getNote(), + dataSource.getUserId(),dataSource.getConnectionParams()); + + if (dataSource != null){ + Connection con = null; + try { + BaseDataSource baseDataSource = null; + if (DbType.MYSQL.name().equals(dataSource.getType().name())){ + baseDataSource = JSONObject.parseObject(dataSource.getConnectionParams(),MySQLDataSource.class); + Class.forName(Constants.JDBC_MYSQL_CLASS_NAME); + }else if (DbType.POSTGRESQL.name().equals(dataSource.getType().name())){ + baseDataSource = JSONObject.parseObject(dataSource.getConnectionParams(),PostgreDataSource.class); + Class.forName(Constants.JDBC_POSTGRESQL_CLASS_NAME); + }else if (DbType.HIVE.name().equals(dataSource.getType().name())){ + baseDataSource = JSONObject.parseObject(dataSource.getConnectionParams(),HiveDataSource.class); + Class.forName(Constants.JDBC_HIVE_CLASS_NAME); + }else if (DbType.SPARK.name().equals(dataSource.getType().name())){ + baseDataSource = JSONObject.parseObject(dataSource.getConnectionParams(),SparkDataSource.class); + Class.forName(Constants.JDBC_SPARK_CLASS_NAME); + } + + Map sqlParamMap = new HashMap(); + StringBuilder sqlBuilder = new StringBuilder(); + + // ready to execute SQL and parameter entity Map + setSqlAndSqlParamsMap(sqlBuilder,sqlParamMap); + + if(EnumUtils.isValidEnum(UdfType.class, sqlParameters.getType()) && StringUtils.isNotEmpty(sqlParameters.getUdfs())){ + List udfFuncList = processDao.queryUdfFunListByids(sqlParameters.getUdfs()); + createFuncs = UDFUtils.createFuncs(udfFuncList, taskProps.getTenantCode(), logger); + } + + // execute sql task + con = executeFuncAndSql(baseDataSource,sqlBuilder.toString(),sqlParamMap,createFuncs); + + } finally { + if (con != null) { + try { + con.close(); + } catch (SQLException e) { + throw e; + } + } + } + } + } + } + + /** + * ready to execute SQL and parameter entity Map + * @return + */ + private void setSqlAndSqlParamsMap(StringBuilder sqlBuilder,Map sqlParamsMap) { + + String sql = sqlParameters.getSql(); + + // find process instance by task id + ProcessInstance processInstance = processDao.findProcessInstanceByTaskId(taskProps.getTaskInstId()); + + Map paramsMap = ParamUtils.convert(taskProps.getUserDefParamsMap(), + taskProps.getDefinedParams(), + sqlParameters.getLocalParametersMap(), + processInstance.getCmdTypeIfComplement(), + processInstance.getScheduleTime()); + + // spell SQL according to the final user-defined variable + if(paramsMap == null){ + sqlBuilder.append(sql); + return; + } + + // special characters need to be escaped, ${} needs to be escaped + String rgex = "'?\\$\\{(.*?)\\}'?"; + setSqlParamsMap(sql,rgex,sqlParamsMap,paramsMap); + + // replace the ${} of the SQL statement with the Placeholder + String formatSql = sql.replaceAll(rgex,"?"); + sqlBuilder.append(formatSql); + + // print repalce sql + printReplacedSql(sql,formatSql,rgex,sqlParamsMap); + } + + @Override + public AbstractParameters getParameters() { + return this.sqlParameters; + } + + /** + * execute sql + * @param baseDataSource + * @param sql + * @param params + */ + public Connection executeFuncAndSql(BaseDataSource baseDataSource, String sql, Map params, List createFuncs){ + Connection connection = null; + try { + + if (DbType.HIVE.name().equals(sqlParameters.getType())) { + Properties paramProp = new Properties(); + paramProp.setProperty("user", baseDataSource.getUser()); + paramProp.setProperty("password", baseDataSource.getPassword()); + Map connParamMap = CollectionUtils.stringToMap(sqlParameters.getConnParams(), Constants.SEMICOLON,"hiveconf:"); + if(connParamMap != null){ + paramProp.putAll(connParamMap); + } + + connection = DriverManager.getConnection(baseDataSource.getJdbcUrl(),paramProp); + }else{ + connection = DriverManager.getConnection(baseDataSource.getJdbcUrl(), + baseDataSource.getUser(), baseDataSource.getPassword()); + } + + Statement funcStmt = connection.createStatement(); + // create temp function + if (createFuncs != null) { + for (String createFunc : createFuncs) { + logger.info("hive create function sql: {}", createFunc); + funcStmt.execute(createFunc); + } + } + + PreparedStatement stmt = connection.prepareStatement(sql); + if(taskProps.getTaskTimeoutStrategy() == TaskTimeoutStrategy.FAILED || taskProps.getTaskTimeoutStrategy() == TaskTimeoutStrategy.WARNFAILED){ + stmt.setQueryTimeout(taskProps.getTaskTimeout()); + } + if(params != null){ + for(Integer key : params.keySet()){ + Property prop = params.get(key); + ParameterUtils.setInParameter(key,stmt,prop.getType(),prop.getValue()); + } + } + // decide whether to executeQuery or executeUpdate based on sqlType + if(sqlParameters.getSqlType() == SqlType.QUERY.ordinal()){ + // query statements need to be convert to JsonArray and inserted into Alert to send + JSONArray array=new JSONArray(); + ResultSet resultSet = stmt.executeQuery(); + ResultSetMetaData md=resultSet.getMetaData(); + int num=md.getColumnCount(); + + while(resultSet.next()){ + JSONObject mapOfColValues=new JSONObject(true); + for(int i=1;i<=num;i++){ + mapOfColValues.put(md.getColumnName(i), resultSet.getObject(i)); + } + array.add(mapOfColValues); + } + + logger.info("execute sql : {}",JSONObject.toJSONString(array, SerializerFeature.WriteMapNullValue)); + + // send as an attachment + if(StringUtils.isEmpty(sqlParameters.getShowType())){ + logger.info("showType is empty,don't need send email"); + }else{ + if(array.size() > 0 ){ + sendAttachment(taskProps.getNodeName() + " query resultsets ",JSONObject.toJSONString(array, SerializerFeature.WriteMapNullValue)); + } + } + + exitStatusCode = 0; + + }else if(sqlParameters.getSqlType() == SqlType.NON_QUERY.ordinal()){ + // non query statement + int result = stmt.executeUpdate(); + exitStatusCode = 0; + } + + } catch (Exception e) { + logger.error(e.getMessage(),e); + } + return connection; + } + + + /** + * send mail as an attachment + * @param title + * @param content + */ + public void sendAttachment(String title,String content){ + + // process instance + ProcessInstance instance = processDao.findProcessInstanceByTaskId(taskProps.getTaskInstId()); + + // process define + ProcessDefinition processDefine = processDao.findProcessDefineById(instance.getProcessDefinitionId()); + + List users = alertDao.queryUserByAlertGroupId(instance.getWarningGroupId()); + + // receiving group list + List receviersList = new ArrayList(); + for(User user:users){ + receviersList.add(user.getEmail()); + } + // custom receiver + String receivers = processDefine.getReceivers(); + if (StringUtils.isNotEmpty(receivers)){ + String[] splits = receivers.split(Constants.COMMA); + for (String receiver : splits){ + receviersList.add(receiver); + } + } + + // copy list + List receviersCcList = new ArrayList(); + + + // Custom Copier + String receiversCc = processDefine.getReceiversCc(); + + if (StringUtils.isNotEmpty(receiversCc)){ + String[] splits = receiversCc.split(Constants.COMMA); + for (String receiverCc : splits){ + receviersCcList.add(receiverCc); + } + } + + String showTypeName = sqlParameters.getShowType().replace(Constants.COMMA,"").trim(); + if(EnumUtils.isValidEnum(ShowType.class,showTypeName)){ + MailUtils.sendMails(receviersList,receviersCcList,title, content, ShowType.valueOf(showTypeName)); + }else{ + logger.error("showType: {} is not valid " ,showTypeName); + } + } + + /** + * regular expressions match the contents between two specified strings + * @param content + * @return + */ + public void setSqlParamsMap(String content, String rgex, Map sqlParamsMap, Map paramsPropsMap){ + Pattern pattern = Pattern.compile(rgex); + Matcher m = pattern.matcher(content); + int index = 1; + while (m.find()) { + + String paramName = m.group(1); + Property prop = paramsPropsMap.get(paramName); + + sqlParamsMap.put(index,prop); + index ++; + } + } + + /** + * print replace sql + * @param content + * @param formatSql + * @param rgex + * @param sqlParamsMap + */ + public void printReplacedSql(String content, String formatSql,String rgex, Map sqlParamsMap){ + //parameter print style + logger.info("after replace sql , preparing : {}" , formatSql); + StringBuffer logPrint = new StringBuffer("replaced sql , parameters:"); + for(int i=1;i<=sqlParamsMap.size();i++){ + logPrint.append(sqlParamsMap.get(i).getValue()+"("+sqlParamsMap.get(i).getType()+")"); + } + logger.info(logPrint.toString()); + + //direct print style + Pattern pattern = Pattern.compile(rgex); + Matcher m = pattern.matcher(content); + int index = 1; + StringBuffer sb = new StringBuffer("replaced sql , direct:"); + while (m.find()) { + + m.appendReplacement(sb, sqlParamsMap.get(index).getValue()); + + index ++; + } + m.appendTail(sb); + logger.info(sb.toString()); + } +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/zk/ZKMasterClient.java b/escheduler-server/src/main/java/cn/escheduler/server/zk/ZKMasterClient.java new file mode 100644 index 0000000000..113ef5de9f --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/zk/ZKMasterClient.java @@ -0,0 +1,465 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.zk; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.ExecutionStatus; +import cn.escheduler.common.utils.CollectionUtils; +import cn.escheduler.common.utils.DateUtils; +import cn.escheduler.common.utils.OSUtils; +import cn.escheduler.common.zk.AbstractZKClient; +import cn.escheduler.dao.AlertDao; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.ServerDao; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.server.ResInfo; +import cn.escheduler.server.utils.ProcessUtils; +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.recipes.cache.PathChildrenCache; +import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent; +import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; +import org.apache.curator.framework.recipes.locks.InterProcessMutex; +import org.apache.curator.utils.ThreadUtils; +import org.apache.zookeeper.CreateMode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Date; +import java.util.List; +import java.util.concurrent.ThreadFactory; + + +/** + * zookeeper master client + * + * single instance + */ +public class ZKMasterClient extends AbstractZKClient { + + private static final Logger logger = LoggerFactory.getLogger(ZKMasterClient.class); + + private static final ThreadFactory defaultThreadFactory = ThreadUtils.newGenericThreadFactory("Master-Main-Thread"); + + /** + * master znode + */ + private String masterZNode = null; + + /** + * master database access + */ + private ServerDao serverDao = null; + /** + * alert database access + */ + private AlertDao alertDao = null; + /** + * flow database access + */ + private ProcessDao processDao; + + + private Date createTime = null; + + /** + * zkMasterClient + */ + private static ZKMasterClient zkMasterClient = null; + + + private ZKMasterClient(ProcessDao processDao){ + this.processDao = processDao; + init(); + } + + private ZKMasterClient(){} + + /** + * get zkMasterClient + * @param processDao + * @return + */ + public static synchronized ZKMasterClient getZKMasterClient(ProcessDao processDao){ + if(zkMasterClient == null){ + zkMasterClient = new ZKMasterClient(processDao); + } + zkMasterClient.processDao = processDao; + + return zkMasterClient; + } + + /** + * init + */ + public void init(){ + // init dao + this.initDao(); + + // init system znode + this.initSystemZNode(); + + // monitor master + this.listenerMaster(); + + // monitor worker + this.listenerWorker(); + + // register master + this.registMaster(); + + // check if fault tolerance is required + if (getActiveMasterNum() == 1) { + processDao.selfFaultTolerant(ExecutionStatus.RUNNING_EXEUTION.ordinal()); + } + } + + + /** + * init dao + */ + public void initDao(){ + this.serverDao = DaoFactory.getDaoInstance(ServerDao.class); + this.alertDao = DaoFactory.getDaoInstance(AlertDao.class); + this.processDao = DaoFactory.getDaoInstance(ProcessDao.class); + } + + /** + * get maste dao + * @return + */ + public ServerDao getServerDao(){ + return serverDao; + } + + /** + * get alert dao + * @return + */ + public AlertDao getAlertDao() { + return alertDao; + } + + /** + * register master znode + */ + public void registMaster(){ + + // get current date + Date now = new Date(); + createTime = now ; + try { + + // encapsulation master znnode + masterZNode = masterZNodeParentPath + "/" + OSUtils.getHost() + "_"; + List masterZNodeList = zkClient.getChildren().forPath(masterZNodeParentPath); + + if (CollectionUtils.isNotEmpty(masterZNodeList)){ + boolean flag = false; + for (String masterZNode : masterZNodeList){ + if (masterZNode.startsWith(OSUtils.getHost())){ + flag = true; + break; + } + } + + if (flag){ + logger.error("register failure , master already started on host : {}" , OSUtils.getHost()); + // exit system + System.exit(-1); + } + } + + // specify the format of stored data in ZK nodes + String heartbeatZKInfo = getOsInfo(now); + // create temporary sequence nodes for master znode + masterZNode = zkClient.create().withMode(CreateMode.EPHEMERAL_SEQUENTIAL).forPath(masterZNode, heartbeatZKInfo.getBytes()); + + logger.info("register master node {} success" , masterZNode); + + // handle dead server + handleDeadServer(masterZNode, Constants.MASTER_PREFIX, Constants.DELETE_ZK_OP); + + // delete master server from database + serverDao.deleteMaster(OSUtils.getHost()); + + // register master znode + serverDao.registerMaster(OSUtils.getHost(), + OSUtils.getProcessID(), + masterZNode, + ResInfo.getResInfoJson(), + createTime, + createTime); + + } catch (Exception e) { + logger.error("register master failure : " + e.getMessage(),e); + } + } + + + /** + * monitor master + */ + public void listenerMaster(){ + PathChildrenCache masterPc = new PathChildrenCache(zkClient, masterZNodeParentPath, true ,defaultThreadFactory); + + try { + Date now = new Date(); + createTime = now ; + masterPc.start(); + masterPc.getListenable().addListener(new PathChildrenCacheListener() { + @Override + public void childEvent(CuratorFramework client, PathChildrenCacheEvent event) throws Exception { + switch (event.getType()) { + case CHILD_ADDED: + logger.info("master node added : {}",event.getData().getPath()); + break; + case CHILD_REMOVED: + String path = event.getData().getPath(); + logger.info("master node deleted : {}",event.getData().getPath()); + + InterProcessMutex mutexLock = null; + try { + // handle dead server, add to zk dead server pth + handleDeadServer(path, Constants.MASTER_PREFIX, Constants.ADD_ZK_OP); + + if(masterZNode.equals(path)){ + logger.error("master server({}) of myself dead , stopping...", path); + stoppable.stop(String.format("master server(%s) of myself dead , stopping...", path)); + break; + } + + // create a distributed lock, and the root node path of the lock space is /escheduler/lock/failover/master + String znodeLock = zkMasterClient.getMasterFailoverLockPath(); + mutexLock = new InterProcessMutex(zkMasterClient.getZkClient(), znodeLock); + mutexLock.acquire(); + + String masterHost = getHostByEventDataPath(path); + for (int i = 0; i < Constants.ESCHEDULER_WARN_TIMES_FAILOVER;i++) { + alertDao.sendServerStopedAlert(1, masterHost, "Master-Server"); + } + + logger.info("start master failover ..."); + + List needFailoverProcessInstanceList = processDao.queryNeddFailoverProcessInstances(masterHost); + + //updateProcessInstance host is null and insert into command + for(ProcessInstance processInstance : needFailoverProcessInstanceList){ + processDao.processNeedFailoverProcessInstances(processInstance); + } + + logger.info("master failover end"); + }catch (Exception e){ + logger.error("master failover failed : " + e.getMessage(),e); + }finally { + if (mutexLock != null){ + try { + mutexLock.release(); + } catch (Exception e) { + logger.error("lock relase failed : " + e.getMessage(),e); + } + } + } + break; + case CHILD_UPDATED: + if (event.getData().getPath().contains(OSUtils.getHost())){ + byte[] bytes = zkClient.getData().forPath(event.getData().getPath()); + String resInfoStr = new String(bytes); + String[] splits = resInfoStr.split(Constants.COMMA); + if (splits.length != Constants.HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH) { + return; + } + // updateProcessInstance Master information in database according to host + serverDao.updateMaster(OSUtils.getHost(), + OSUtils.getProcessID(), + ResInfo.getResInfoJson(Double.parseDouble(splits[2]), + Double.parseDouble(splits[3])), + DateUtils.stringToDate(splits[5])); + + logger.debug("master zk node updated : {}",event.getData().getPath()); + } + break; + default: + break; + } + } + }); + }catch (Exception e){ + logger.error("monitor master failed : " + e.getMessage(),e); + } + + } + + /** + * monitor worker + */ + public void listenerWorker(){ + + PathChildrenCache workerPc = new PathChildrenCache(zkClient,workerZNodeParentPath,true ,defaultThreadFactory); + try { + Date now = new Date(); + createTime = now ; + workerPc.start(); + workerPc.getListenable().addListener(new PathChildrenCacheListener() { + @Override + public void childEvent(CuratorFramework client, PathChildrenCacheEvent event) { + switch (event.getType()) { + case CHILD_ADDED: + logger.info("node added : {}" ,event.getData().getPath()); + break; + case CHILD_REMOVED: + String path = event.getData().getPath(); + + logger.info("node deleted : {}",event.getData().getPath()); + + InterProcessMutex mutex = null; + try { + + // handle dead server + handleDeadServer(path, Constants.WORKER_PREFIX, Constants.ADD_ZK_OP); + + // create a distributed lock, and the root node path of the lock space is /escheduler/lock/failover/worker + String znodeLock = zkMasterClient.getWorkerFailoverLockPath(); + mutex = new InterProcessMutex(zkMasterClient.getZkClient(), znodeLock); + mutex.acquire(); + + String workerHost = getHostByEventDataPath(path); + for (int i = 0; i < Constants.ESCHEDULER_WARN_TIMES_FAILOVER;i++) { + alertDao.sendServerStopedAlert(1, workerHost, "Worker-Server"); + } + + logger.info("start worker failover ..."); + + + List needFailoverTaskInstanceList = processDao.queryNeedFailoverTaskInstances(workerHost); + for(TaskInstance taskInstance : needFailoverTaskInstanceList){ + ProcessInstance instance = processDao.findProcessInstanceDetailById(taskInstance.getProcessInstanceId()); + if(instance!=null){ + taskInstance.setProcessInstance(instance); + } + // only kill yarn job if exists , the local thread has exited + ProcessUtils.killYarnJob(taskInstance); + } + + //updateProcessInstance state value is NEED_FAULT_TOLERANCE + processDao.updateNeedFailoverTaskInstances(workerHost); + + logger.info("worker failover end"); + }catch (Exception e){ + logger.error("worker failover failed : " + e.getMessage(),e); + } + finally { + if (mutex != null){ + try { + mutex.release(); + } catch (Exception e) { + logger.error("lock relase failed : " + e.getMessage(),e); + } + } + } + break; + default: + break; + } + } + }); + }catch (Exception e){ + logger.error("listener worker failed : " + e.getMessage(),e); + } + + } + + + /** + * get os info + * @param now + * @return + */ + private String getOsInfo(Date now) { + return ResInfo.buildHeartbeatForZKInfo(OSUtils.getHost(), + OSUtils.getProcessID(), + OSUtils.cpuUsage(), + OSUtils.memoryUsage(), + DateUtils.dateToString(now), + DateUtils.dateToString(now)); + } + + + /** + * get master znode + * @return + */ + public String getMasterZNode() { + return masterZNode; + } + + + /** + * get master lock path + * @return + */ + public String getMasterLockPath(){ + return conf.getString(Constants.ZOOKEEPER_ESCHEDULER_LOCK_MASTERS); + } + + /** + * get master failover lock path + * @return + */ + public String getMasterFailoverLockPath(){ + return conf.getString(Constants.ZOOKEEPER_ESCHEDULER_LOCK_FAILOVER_MASTERS); + } + + /** + * get worker failover lock path + * @return + */ + public String getWorkerFailoverLockPath(){ + return conf.getString(Constants.ZOOKEEPER_ESCHEDULER_LOCK_FAILOVER_WORKERS); + } + + /** + * get zkclient + * @return + */ + public CuratorFramework getZkClient() { + return zkClient; + } + + + + /** + * get host ip + * @param path + * @return + */ + private String getHostByEventDataPath(String path) { + int startIndex = path.lastIndexOf("/")+1; + int endIndex = path.lastIndexOf("_"); + + if(startIndex >= endIndex){ + logger.error("parse ip error"); + } + return path.substring(startIndex, endIndex); + } + + + + + +} diff --git a/escheduler-server/src/main/java/cn/escheduler/server/zk/ZKWorkerClient.java b/escheduler-server/src/main/java/cn/escheduler/server/zk/ZKWorkerClient.java new file mode 100644 index 0000000000..7cf899d832 --- /dev/null +++ b/escheduler-server/src/main/java/cn/escheduler/server/zk/ZKWorkerClient.java @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.zk; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.utils.CollectionUtils; +import cn.escheduler.common.utils.DateUtils; +import cn.escheduler.common.utils.OSUtils; +import cn.escheduler.common.zk.AbstractZKClient; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ServerDao; +import cn.escheduler.server.ResInfo; +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.recipes.cache.PathChildrenCache; +import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent; +import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; +import org.apache.curator.utils.ThreadUtils; +import org.apache.zookeeper.CreateMode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Date; +import java.util.List; +import java.util.concurrent.ThreadFactory; + + +/** + * zookeeper worker client + * single instance + */ +public class ZKWorkerClient extends AbstractZKClient { + + private static final Logger logger = LoggerFactory.getLogger(ZKWorkerClient.class); + + + private static final ThreadFactory defaultThreadFactory = ThreadUtils.newGenericThreadFactory("Worker-Main-Thread"); + + + /** + * worker znode + */ + private String workerZNode = null; + + /** + * worker database access + */ + private ServerDao serverDao = null; + + /** + * create time + */ + private Date createTime = null; + + /** + * zkWorkerClient + */ + private static ZKWorkerClient zkWorkerClient = null; + + private ZKWorkerClient(){ + init(); + } + + /** + * init + */ + private void init(){ + // init worker dao + serverDao = DaoFactory.getDaoInstance(ServerDao.class); + + // init system znode + this.initSystemZNode(); + + // monitor worker + this.listenerWorker(); + + // register worker + this.registWorker(); + } + + + /** + * get zkWorkerClient + * + * @return + */ + public static synchronized ZKWorkerClient getZKWorkerClient(){ + if(zkWorkerClient == null){ + zkWorkerClient = new ZKWorkerClient(); + } + + return zkWorkerClient; + } + + /** + * get worker dao + * @return + */ + public ServerDao getServerDao(){ + return serverDao; + } + + + public String initWorkZNode() throws Exception { + + Date now = new Date(); + String heartbeatZKInfo = getOsInfo(now); + + + workerZNode = workerZNodeParentPath + "/" + OSUtils.getHost() + "_"; + workerZNode = zkClient.create().withMode(CreateMode.EPHEMERAL_SEQUENTIAL).forPath(workerZNode, + heartbeatZKInfo.getBytes()); + logger.info("register worker node {} success", workerZNode); + return workerZNode; + } + + /** + * register worker + */ + private void registWorker(){ + + // get current date + Date now = new Date(); + createTime = now ; + try { + + // encapsulation worker znnode + workerZNode = workerZNodeParentPath + "/" + OSUtils.getHost() + "_"; + List workerZNodeList = zkClient.getChildren().forPath(workerZNodeParentPath); + + + if (CollectionUtils.isNotEmpty(workerZNodeList)){ + boolean flag = false; + for (String workerZNode : workerZNodeList){ + if (workerZNode.startsWith(OSUtils.getHost())){ + flag = true; + break; + } + } + + if (flag){ + logger.info("register failure , worker already started on : {}, please wait for a moment and try again" , OSUtils.getHost()); + // exit system + System.exit(-1); + } + } + +// String heartbeatZKInfo = getOsInfo(now); +// workerZNode = zkClient.create().withMode(CreateMode.EPHEMERAL_SEQUENTIAL).forPath(workerZNode, +// heartbeatZKInfo.getBytes()); + + initWorkZNode(); + // handle dead server + handleDeadServer(workerZNode, Constants.WORKER_PREFIX, Constants.DELETE_ZK_OP); + + // delete worker server from database + serverDao.deleteWorker(OSUtils.getHost()); + + // register worker znode + serverDao.registerWorker(OSUtils.getHost(), + OSUtils.getProcessID(), + workerZNode, + ResInfo.getResInfoJson(), + createTime, + createTime); + } catch (Exception e) { + logger.error("register worker failure : " + e.getMessage(),e); + } + } + + /** + * monitor worker + */ + private void listenerWorker(){ + PathChildrenCache workerPc = new PathChildrenCache(zkClient, workerZNodeParentPath, true, defaultThreadFactory); + try { + + Date now = new Date(); + createTime = now ; + workerPc.start(); + workerPc.getListenable().addListener(new PathChildrenCacheListener() { + @Override + public void childEvent(CuratorFramework client, PathChildrenCacheEvent event) throws Exception { + switch (event.getType()) { + case CHILD_ADDED: + logger.info("node added : {}" ,event.getData().getPath()); + break; + case CHILD_REMOVED: + String path = event.getData().getPath(); + + // handle dead server, add to zk dead server path + handleDeadServer(path, Constants.WORKER_PREFIX, Constants.ADD_ZK_OP); + + //find myself dead + if(workerZNode.equals(path)){ + + logger.warn(" worker server({}) of myself dead , stopping...", path); + stoppable.stop(String.format("worker server(%s) of myself dead , stopping",path)); + } + logger.info("node deleted : {}", event.getData().getPath()); + break; + case CHILD_UPDATED: + if (event.getData().getPath().contains(OSUtils.getHost())){ + byte[] bytes = zkClient.getData().forPath(event.getData().getPath()); + String resInfoStr = new String(bytes); + String[] splits = resInfoStr.split(Constants.COMMA); + if (splits.length != Constants.HEARTBEAT_FOR_ZOOKEEPER_INFO_LENGTH) { + return; + } + + // updateProcessInstance master info in database according to host + serverDao.updateWorker(OSUtils.getHost(), + OSUtils.getProcessID(), + ResInfo.getResInfoJson(Double.parseDouble(splits[2]) + ,Double.parseDouble(splits[3])), + DateUtils.stringToDate(splits[5])); + logger.debug("node updated : {}",event.getData().getPath()); + } + break; + default: + break; + } + } + }); + }catch (Exception e){ + logger.error("monitor worker failed : " + e.getMessage(),e); + } + + } + + /** + * get os info + * @param now + * @return + */ + private String getOsInfo(Date now) { + return ResInfo.buildHeartbeatForZKInfo(OSUtils.getHost(), + OSUtils.getProcessID(), + OSUtils.cpuUsage(), + OSUtils.memoryUsage(), + DateUtils.dateToString(now), + DateUtils.dateToString(now)); + } + + + /** + * get worker znode + * @return + */ + public String getWorkerZNode() { + return workerZNode; + } + + + /** + * get zkclient + * @return + */ + public CuratorFramework getZkClient() { + return zkClient; + } + + + /** + * get worker lock path + * @return + */ + public String getWorkerLockPath(){ + return conf.getString(Constants.ZOOKEEPER_ESCHEDULER_LOCK_WORKERS); + } + + +} diff --git a/escheduler-server/src/main/resources/application_master.properties b/escheduler-server/src/main/resources/application_master.properties new file mode 100644 index 0000000000..cc4774ae94 --- /dev/null +++ b/escheduler-server/src/main/resources/application_master.properties @@ -0,0 +1 @@ +logging.config=classpath:master_logback.xml diff --git a/escheduler-server/src/main/resources/master.properties b/escheduler-server/src/main/resources/master.properties new file mode 100644 index 0000000000..9080defc7b --- /dev/null +++ b/escheduler-server/src/main/resources/master.properties @@ -0,0 +1,21 @@ +# master execute thread num +master.exec.threads=100 + +# master execute task number in parallel +master.exec.task.number=20 + +# master heartbeat interval +master.heartbeat.interval=10 + +# master commit task retry times +master.task.commit.retryTimes=5 + +# master commit task interval +master.task.commit.interval=100 + + +# only less than cpu avg load, master server can work. default value : the number of cpu cores * 2 +master.max.cpuload.avg=10 + +# only larger than reserved memory, master server can work. default value : physical memory * 1/10, unit is G. +master.reserved.memory=1 diff --git a/escheduler-server/src/main/resources/master_logback.xml b/escheduler-server/src/main/resources/master_logback.xml new file mode 100644 index 0000000000..d93878218e --- /dev/null +++ b/escheduler-server/src/main/resources/master_logback.xml @@ -0,0 +1,34 @@ + + + + + + + [%level] %date{yyyy-MM-dd HH:mm:ss.SSS} %logger{96}:[%line] - %msg%n + + UTF-8 + + + + + ${log.base}/escheduler-master.log + + INFO + + + ${log.base}/escheduler-master.%d{yyyy-MM-dd_HH}.%i.log + 168 + 200MB + + + + [%level] %date{yyyy-MM-dd HH:mm:ss.SSS} %logger{96}:[%line] - %msg%n + + UTF-8 + + + + + + + \ No newline at end of file diff --git a/escheduler-server/src/main/resources/worker.properties b/escheduler-server/src/main/resources/worker.properties new file mode 100644 index 0000000000..d1e1dca98a --- /dev/null +++ b/escheduler-server/src/main/resources/worker.properties @@ -0,0 +1,15 @@ +# worker execute thread num +worker.exec.threads=100 + +# worker heartbeat interval +worker.heartbeat.interval=10 + +# submit the number of tasks at a time +worker.fetch.task.num = 10 + + +# only less than cpu avg load, worker server can work. default value : the number of cpu cores * 2 +worker.max.cpuload.avg=10 + +# only larger than reserved memory, worker server can work. default value : physical memory * 1/6, unit is G. +worker.reserved.memory=1 \ No newline at end of file diff --git a/escheduler-server/src/main/resources/worker_logback.xml b/escheduler-server/src/main/resources/worker_logback.xml new file mode 100644 index 0000000000..32914ec84f --- /dev/null +++ b/escheduler-server/src/main/resources/worker_logback.xml @@ -0,0 +1,53 @@ + + + + + + + [%level] %date{yyyy-MM-dd HH:mm:ss.SSS} %logger{96}:[%line] - %msg%n + + UTF-8 + + + + + INFO + + + ${log.base}/{processDefinitionId}/{processInstanceId}/{taskInstanceId}.log + + + [%level] %date{yyyy-MM-dd HH:mm:ss.SSS} %logger{96}:[%line] - %msg%n + + UTF-8 + + true + + + + ${log.base}/escheduler-worker.log + + INFO + + + + ${log.base}/escheduler-worker.%d{yyyy-MM-dd_HH}.%i.log + 168 + 200MB + +       + + + [%level] %date{yyyy-MM-dd HH:mm:ss.SSS} %logger{96}:[%line] - %msg%n + + UTF-8 + +    + + + + + + + + \ No newline at end of file diff --git a/escheduler-server/src/test/java/cn/escheduler/server/master/AlertManagerTest.java b/escheduler-server/src/test/java/cn/escheduler/server/master/AlertManagerTest.java new file mode 100644 index 0000000000..e6881a3afc --- /dev/null +++ b/escheduler-server/src/test/java/cn/escheduler/server/master/AlertManagerTest.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.master; + +import cn.escheduler.common.enums.ExecutionStatus; +import cn.escheduler.dao.datasource.ConnectionFactory; +import cn.escheduler.dao.mapper.ProcessDefinitionMapper; +import cn.escheduler.dao.mapper.ProcessInstanceMapper; +import cn.escheduler.dao.mapper.TaskInstanceMapper; +import cn.escheduler.dao.model.ProcessDefinition; +import cn.escheduler.dao.model.ProcessInstance; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.server.utils.AlertManager; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + + +/** + * alert manager test + */ +public class AlertManagerTest { + + private static final Logger logger = LoggerFactory.getLogger(AlertManagerTest.class); + + ProcessDefinitionMapper processDefinitionMapper; + ProcessInstanceMapper processInstanceMapper; + TaskInstanceMapper taskInstanceMapper; + + AlertManager alertManager; + + @Before + public void before(){ + processDefinitionMapper = ConnectionFactory.getSqlSession().getMapper(ProcessDefinitionMapper.class); + processInstanceMapper = ConnectionFactory.getSqlSession().getMapper(ProcessInstanceMapper.class); + taskInstanceMapper = ConnectionFactory.getSqlSession().getMapper(TaskInstanceMapper.class); + alertManager = new AlertManager(); + } + + /** + * send worker alert fault tolerance + */ + @Test + public void sendWarnningWorkerleranceFaultTest(){ + // process instance + ProcessInstance processInstance = processInstanceMapper.queryDetailById(13028); + + // set process definition + ProcessDefinition processDefinition = processDefinitionMapper.queryByDefineId(47); + processInstance.setProcessDefinition(processDefinition); + + + // fault task instance + TaskInstance toleranceTask1 = taskInstanceMapper.queryById(5038); + TaskInstance toleranceTask2 = taskInstanceMapper.queryById(5039); + + List toleranceTaskList = new ArrayList<>(2); + toleranceTaskList.add(toleranceTask1); + toleranceTaskList.add(toleranceTask2); + + alertManager.sendWarnningWorkerleranceFault(processInstance, toleranceTaskList); + } + + + /** + * send worker alert fault tolerance + */ + @Test + public void sendWarnningOfProcessInstanceTest(){ + // process instance + ProcessInstance processInstance = processInstanceMapper.queryDetailById(13028); + + // set process definition + ProcessDefinition processDefinition = processDefinitionMapper.queryByDefineId(47); + processInstance.setProcessDefinition(processDefinition); + + + // fault task instance + TaskInstance toleranceTask1 = taskInstanceMapper.queryById(5038); + toleranceTask1.setState(ExecutionStatus.FAILURE); + TaskInstance toleranceTask2 = taskInstanceMapper.queryById(5039); + toleranceTask2.setState(ExecutionStatus.FAILURE); + + List toleranceTaskList = new ArrayList<>(2); + toleranceTaskList.add(toleranceTask1); + toleranceTaskList.add(toleranceTask2); + + alertManager.sendWarnningOfProcessInstance(processInstance, toleranceTaskList); + } + +} diff --git a/escheduler-server/src/test/java/cn/escheduler/server/master/MasterCommandTest.java b/escheduler-server/src/test/java/cn/escheduler/server/master/MasterCommandTest.java new file mode 100644 index 0000000000..3aed212618 --- /dev/null +++ b/escheduler-server/src/test/java/cn/escheduler/server/master/MasterCommandTest.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.master; + +import cn.escheduler.common.enums.CommandType; +import cn.escheduler.common.enums.FailureStrategy; +import cn.escheduler.common.enums.WarningType; +import cn.escheduler.dao.datasource.ConnectionFactory; +import cn.escheduler.dao.mapper.CommandMapper; +import cn.escheduler.dao.model.Command; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * master test + */ +public class MasterCommandTest { + + private final Logger logger = LoggerFactory.getLogger(MasterCommandTest.class); + + private CommandMapper commandMapper; + + @Before + public void before(){ + commandMapper = ConnectionFactory.getSqlSession().getMapper(CommandMapper.class); + } + + + + + @Test + public void StartFromFailedCommand(){ + Command cmd = new Command(); + cmd.setCommandType(CommandType.START_FAILURE_TASK_PROCESS); + cmd.setCommandParam("{\"ProcessInstanceId\":325}"); + cmd.setProcessDefinitionId(63); + + commandMapper.insert(cmd); + + } + + @Test + public void RecoverSuspendCommand(){ + + Command cmd = new Command(); + cmd.setProcessDefinitionId(44); + cmd.setCommandParam("{\"ProcessInstanceId\":290}"); + cmd.setCommandType(CommandType.RECOVER_SUSPENDED_PROCESS); + + commandMapper.insert(cmd); + } + + + + + @Test + public void startNewProcessCommand(){ + Command cmd = new Command(); + cmd.setCommandType(CommandType.START_PROCESS); + cmd.setProcessDefinitionId(167); + cmd.setFailureStrategy(FailureStrategy.CONTINUE); + cmd.setWarningType(WarningType.NONE); + cmd.setWarningGroupId(4); + cmd.setExecutorId(19); + + commandMapper.insert(cmd); + } + + @Test + public void ToleranceCommand(){ + Command cmd = new Command(); + cmd.setCommandType(CommandType.RECOVER_TOLERANCE_FAULT_PROCESS); + cmd.setCommandParam("{\"ProcessInstanceId\":816}"); + cmd.setProcessDefinitionId(15); + + commandMapper.insert(cmd); + } + + @Test + public void insertCommand(){ + Command cmd = new Command(); + cmd.setCommandType(CommandType.START_PROCESS); + cmd.setFailureStrategy(FailureStrategy.CONTINUE); + cmd.setWarningType(WarningType.ALL); + cmd.setProcessDefinitionId(72); + cmd.setExecutorId(10); + commandMapper.insert(cmd); + } + + +} diff --git a/escheduler-server/src/test/java/cn/escheduler/server/master/ParamsTest.java b/escheduler-server/src/test/java/cn/escheduler/server/master/ParamsTest.java new file mode 100644 index 0000000000..c3f8ab99e3 --- /dev/null +++ b/escheduler-server/src/test/java/cn/escheduler/server/master/ParamsTest.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.master; + +import cn.escheduler.common.enums.CommandType; +import cn.escheduler.common.enums.DataType; +import cn.escheduler.common.enums.Direct; +import cn.escheduler.common.process.Property; +import cn.escheduler.common.utils.ParameterUtils; +import cn.escheduler.common.utils.placeholder.BusinessTimeUtils; +import cn.escheduler.server.utils.ParamUtils; +import com.alibaba.fastjson.JSON; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Calendar; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; + + +/** + * user define param + */ +public class ParamsTest { + + private static final Logger logger = LoggerFactory.getLogger(ParamsTest.class); + + + @Test + public void systemParamsTest()throws Exception{ + String command = "${system.biz.date}"; + + // start process + Map timeParams = BusinessTimeUtils + .getBusinessTime(CommandType.START_PROCESS, + new Date()); + + command = ParameterUtils.convertParameterPlaceholders(command, timeParams); + + logger.info("start process : {}",command); + + + Calendar calendar = Calendar.getInstance(); + calendar.setTime(new Date()); + calendar.add(Calendar.DAY_OF_MONTH, -5); + + + command = "${system.biz.date}"; + // complement data + timeParams = BusinessTimeUtils + .getBusinessTime(CommandType.COMPLEMENT_DATA, + calendar.getTime()); + command = ParameterUtils.convertParameterPlaceholders(command, timeParams); + logger.info("complement data : {}",command); + + } + + @Test + public void convertTest()throws Exception{ + Map globalParams = new HashMap<>(); + Property property = new Property(); + property.setProp("global_param"); + property.setDirect(Direct.IN); + property.setType(DataType.VARCHAR); + property.setValue("${system.biz.date}"); + globalParams.put("global_param",property); + + Map globalParamsMap = new HashMap<>(); + globalParamsMap.put("global_param","${system.biz.date}"); + + + Map localParams = new HashMap<>(); + Property localProperty = new Property(); + localProperty.setProp("local_param"); + localProperty.setDirect(Direct.IN); + localProperty.setType(DataType.VARCHAR); + localProperty.setValue("${global_param}"); + localParams.put("local_param", localProperty); + + Map paramsMap = ParamUtils.convert(globalParams, globalParamsMap, + localParams, CommandType.START_PROCESS, new Date()); + logger.info(JSON.toJSONString(paramsMap)); + + + } +} \ No newline at end of file diff --git a/escheduler-server/src/test/java/cn/escheduler/server/worker/shell/ShellCommandExecutorTest.java b/escheduler-server/src/test/java/cn/escheduler/server/worker/shell/ShellCommandExecutorTest.java new file mode 100644 index 0000000000..0f947dac8a --- /dev/null +++ b/escheduler-server/src/test/java/cn/escheduler/server/worker/shell/ShellCommandExecutorTest.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.shell; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.ExecutionStatus; +import cn.escheduler.common.model.TaskNode; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.server.utils.LoggerUtils; +import cn.escheduler.server.worker.log.TaskLogger; +import cn.escheduler.server.worker.task.AbstractTask; +import cn.escheduler.server.worker.task.TaskManager; +import cn.escheduler.server.worker.task.TaskProps; +import com.alibaba.fastjson.JSONObject; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Date; + +/** + * python shell command executor test + */ +public class ShellCommandExecutorTest { + + private static final Logger logger = LoggerFactory.getLogger(ShellCommandExecutorTest.class); + private static final String TASK_PREFIX = "TASK"; + + private ProcessDao processDao = null; + + @Before + public void before(){ + processDao = DaoFactory.getDaoInstance(ProcessDao.class); + } + + @Test + public void test() throws Exception { + + TaskProps taskProps = new TaskProps(); + // processDefineId_processInstanceId_taskInstanceId + taskProps.setTaskDir("/opt/soft/program/tmp/escheduler/exec/flow/5/36/2864/7657"); + taskProps.setTaskAppId("36_2864_7657"); + // set tenant -> task execute linux user + taskProps.setTenantCode("hdfs"); + taskProps.setTaskStartTime(new Date()); + taskProps.setTaskTimeout(360000); + taskProps.setTaskInstId(7657); + + + + TaskInstance taskInstance = processDao.findTaskInstanceById(7657); + + String taskJson = taskInstance.getTaskJson(); + TaskNode taskNode = JSONObject.parseObject(taskJson, TaskNode.class); + taskProps.setTaskParams(taskNode.getParams()); + + + // custom logger + TaskLogger taskLogger = new TaskLogger(LoggerUtils.buildTaskId(TASK_PREFIX, + taskInstance.getProcessDefinitionId(), + taskInstance.getProcessInstanceId(), + taskInstance.getId())); + + + AbstractTask task = TaskManager.newTask(taskInstance.getTaskType(), taskProps, taskLogger); + + logger.info("task info : {}", task); + + // job init + task.init(); + + // job handle + task.handle(); + ExecutionStatus status = ExecutionStatus.SUCCESS; + + if (task.getExitStatusCode() == Constants.EXIT_CODE_SUCCESS){ + status = ExecutionStatus.SUCCESS; + }else if (task.getExitStatusCode() == Constants.EXIT_CODE_KILL){ + status = ExecutionStatus.KILL; + }else { + status = ExecutionStatus.FAILURE; + } + + logger.info(status.toString()); + } +} \ No newline at end of file diff --git a/escheduler-server/src/test/java/cn/escheduler/server/worker/sql/SqlExecutorTest.java b/escheduler-server/src/test/java/cn/escheduler/server/worker/sql/SqlExecutorTest.java new file mode 100644 index 0000000000..aecf3e9230 --- /dev/null +++ b/escheduler-server/src/test/java/cn/escheduler/server/worker/sql/SqlExecutorTest.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.sql; + +import cn.escheduler.common.Constants; +import cn.escheduler.common.enums.ExecutionStatus; +import cn.escheduler.common.model.TaskNode; +import cn.escheduler.dao.DaoFactory; +import cn.escheduler.dao.ProcessDao; +import cn.escheduler.dao.model.TaskInstance; +import cn.escheduler.server.utils.LoggerUtils; +import cn.escheduler.server.worker.log.TaskLogger; +import cn.escheduler.server.worker.task.AbstractTask; +import cn.escheduler.server.worker.task.TaskManager; +import cn.escheduler.server.worker.task.TaskProps; +import com.alibaba.fastjson.JSONObject; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Date; + +/** + * python shell command executor test + */ +public class SqlExecutorTest { + + private static final Logger logger = LoggerFactory.getLogger(SqlExecutorTest.class); + private static final String TASK_PREFIX = "TASK"; + + private ProcessDao processDao = null; + + @Before + public void before(){ + processDao = DaoFactory.getDaoInstance(ProcessDao.class); + } + + @Test + public void test() throws Exception { + + TaskProps taskProps = new TaskProps(); + taskProps.setTaskDir(""); + // processDefineId_processInstanceId_taskInstanceId + taskProps.setTaskAppId("51_11282_263978"); + // set tenant -> task execute linux user + taskProps.setTenantCode("hdfs"); + taskProps.setTaskStartTime(new Date()); + taskProps.setTaskTimeout(360000); + taskProps.setTaskInstId(263978); + taskProps.setNodeName("mysql sql test"); + + + + TaskInstance taskInstance = processDao.findTaskInstanceById(263978); + + String taskJson = taskInstance.getTaskJson(); + TaskNode taskNode = JSONObject.parseObject(taskJson, TaskNode.class); + taskProps.setTaskParams(taskNode.getParams()); + + + // custom logger + TaskLogger taskLogger = new TaskLogger(LoggerUtils.buildTaskId(TASK_PREFIX, + taskInstance.getProcessDefinitionId(), + taskInstance.getProcessInstanceId(), + taskInstance.getId())); + + + AbstractTask task = TaskManager.newTask(taskInstance.getTaskType(), taskProps, taskLogger); + + logger.info("task info : {}", task); + + // job init + task.init(); + + // job handle + task.handle(); + ExecutionStatus status = ExecutionStatus.SUCCESS; + + if (task.getExitStatusCode() == Constants.EXIT_CODE_SUCCESS){ + status = ExecutionStatus.SUCCESS; + }else if (task.getExitStatusCode() == Constants.EXIT_CODE_KILL){ + status = ExecutionStatus.KILL; + }else { + status = ExecutionStatus.FAILURE; + } + + logger.info(status.toString()); + } +} \ No newline at end of file diff --git a/escheduler-server/src/test/java/cn/escheduler/server/worker/task/dependent/DependentTaskTest.java b/escheduler-server/src/test/java/cn/escheduler/server/worker/task/dependent/DependentTaskTest.java new file mode 100644 index 0000000000..fae7ccc5d0 --- /dev/null +++ b/escheduler-server/src/test/java/cn/escheduler/server/worker/task/dependent/DependentTaskTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package cn.escheduler.server.worker.task.dependent; + +import cn.escheduler.common.Constants; +import cn.escheduler.server.worker.task.TaskProps; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DependentTaskTest { + + private static final Logger logger = LoggerFactory.getLogger(DependentTaskTest.class); + + @Test + public void testDependInit(){ + + + TaskProps taskProps = new TaskProps(); + + + + String dependString = "{\n" + + "\"dependTaskList\":[\n" + + " {\n" + + " \"dependItemList\":[\n" + + " {\n" + + " \"definitionId\": 101,\n" + + " \"depTasks\": \"ALL\",\n" + + " \"cycle\": \"day\",\n" + + " \"dateValue\": \"last1Day\"\n" + + " }\n" + + " ],\n" + + " \"relation\": \"AND\"\n" + + " }\n" + + " ],\n" + + "\"relation\":\"OR\"\n" + + "}"; + + taskProps.setTaskInstId(252612); + taskProps.setDependence(dependString); + DependentTask dependentTask = new DependentTask(taskProps, logger); + dependentTask.init(); + dependentTask.handle(); + Assert.assertEquals(dependentTask.getExitStatusCode(), Constants.EXIT_CODE_FAILURE ); + } + + + +} \ No newline at end of file diff --git a/escheduler-server/src/test/java/cn/escheduler/server/zk/ZKWorkerClientTest.java b/escheduler-server/src/test/java/cn/escheduler/server/zk/ZKWorkerClientTest.java new file mode 100644 index 0000000000..04e8b8d606 --- /dev/null +++ b/escheduler-server/src/test/java/cn/escheduler/server/zk/ZKWorkerClientTest.java @@ -0,0 +1,20 @@ +package cn.escheduler.server.zk; + +import cn.escheduler.common.Constants; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class ZKWorkerClientTest { + + @Test + public void getZKWorkerClient() throws Exception { + + + ZKWorkerClient zkWorkerClient = ZKWorkerClient.getZKWorkerClient(); + zkWorkerClient.removeDeadServerByHost("127.0.0.1", Constants.WORKER_PREFIX); + + + } + +} \ No newline at end of file