From de50f43de63e42488cb38169cd56fba77dd24fc6 Mon Sep 17 00:00:00 2001 From: Jiajie Zhong Date: Mon, 25 Apr 2022 15:35:43 +0800 Subject: [PATCH] [common] Make dolphinscheduler_env.sh work when start server (#9726) * [common] Make dolphinscheduler_env.sh work * Change dist tarball `dolphinscheduler_env.sh` location from `bin/` to `conf/`, which users could finish their change configuration operation in one single directory. and we only need to add `$DOLPHINSCHEDULER_HOME/conf` when we start our sever instead of adding both `$DOLPHINSCHEDULER_HOME/conf` and `$DOLPHINSCHEDULER_HOME/bin` * Change the `start.sh`'s path of `dolphinscheduler_env.sh` * Change the setting order of `dolphinscheduler_env.sh` * `bin/env/dolphinscheduler_env.sh` will overwrite the `/conf/dolphinscheduler_env.sh` when start the server using `bin/dolphinsceduler_daemon.sh` or `bin/install.sh` * Change the related docs --- docs/docs/en/architecture/configuration.md | 48 +++++++++----- docs/docs/en/faq.md | 2 +- docs/docs/en/guide/expansion-reduction.md | 4 +- docs/docs/en/guide/installation/cluster.md | 6 +- .../en/guide/installation/pseudo-cluster.md | 58 ++++++++++------- docs/docs/en/guide/task/datax.md | 2 +- docs/docs/en/guide/task/flink.md | 2 +- docs/docs/en/guide/task/map-reduce.md | 2 +- docs/docs/en/guide/task/spark.md | 2 +- docs/docs/zh/architecture/configuration.md | 49 ++++++++++----- docs/docs/zh/faq.md | 2 +- docs/docs/zh/guide/expansion-reduction.md | 2 +- docs/docs/zh/guide/installation/cluster.md | 4 +- .../zh/guide/installation/pseudo-cluster.md | 62 +++++++++++-------- docs/docs/zh/guide/task/datax.md | 2 +- docs/docs/zh/guide/task/flink.md | 2 +- docs/docs/zh/guide/task/map-reduce.md | 2 +- docs/docs/zh/guide/task/spark.md | 2 +- docs/docs/zh/guide/upgrade.md | 2 +- .../dolphinscheduler-alert-server.xml | 2 +- .../src/main/bin/start.sh | 2 +- .../assembly/dolphinscheduler-api-server.xml | 2 +- dolphinscheduler-api/src/main/bin/start.sh | 2 +- .../dolphinscheduler/common/Constants.java | 2 +- .../common/utils/CommonUtils.java | 2 +- .../src/main/resources/common.properties | 2 +- .../common/utils/CommonUtilsTest.java | 5 +- .../docker/file-manage/common.properties | 2 +- .../dolphinscheduler-master-server.xml | 2 +- dolphinscheduler-master/src/main/bin/start.sh | 2 +- .../dolphinscheduler-standalone-server.xml | 15 ++--- .../src/main/bin/start.sh | 2 +- .../src/main/dist-bin/start.sh | 2 +- .../main/assembly/dolphinscheduler-tools.xml | 2 +- .../src/main/bin/upgrade-schema.sh | 2 +- .../dolphinscheduler-worker-server.xml | 2 +- dolphinscheduler-worker/src/main/bin/start.sh | 2 + script/dolphinscheduler-daemon.sh | 16 +++++ script/env/dolphinscheduler_env.sh | 30 +++++---- script/scp-hosts.sh | 4 +- 40 files changed, 221 insertions(+), 136 deletions(-) diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index 5caf1fc9d5..b56ed1b119 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -397,21 +397,41 @@ apiServers="ds1" ### dolphinscheduler_env.sh [load environment variables configs] -When using shell to commit tasks, DS will load environment variables inside dolphinscheduler_env.sh into the host. -Types of tasks involved are: Shell, Python, Spark, Flink, DataX, etc. -```bash -export HADOOP_HOME=/opt/soft/hadoop -export HADOOP_CONF_DIR=/opt/soft/hadoop/etc/hadoop -export SPARK_HOME1=/opt/soft/spark1 -export SPARK_HOME2=/opt/soft/spark2 -export PYTHON_HOME=/opt/soft/python -export JAVA_HOME=/opt/soft/java -export HIVE_HOME=/opt/soft/hive -export FLINK_HOME=/opt/soft/flink -export DATAX_HOME=/opt/soft/datax/bin/datax.py - -export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_HOME:$PATH +When using shell to commit tasks, DolphinScheduler will export environment variables from `bin/env/dolphinscheduler_env.sh`. The +mainly configuration including `JAVA_HOME`, mata database, registry center, and task configuration. +```bash +# JAVA_HOME, will use it to start DolphinScheduler server +export JAVA_HOME=${JAVA_HOME:-/opt/soft/java} + +# Database related configuration, set database type, username and password +export DATABASE=${DATABASE:-postgresql} +export SPRING_PROFILES_ACTIVE=${DATABASE} +export SPRING_DATASOURCE_DRIVER_CLASS_NAME +export SPRING_DATASOURCE_URL +export SPRING_DATASOURCE_USERNAME +export SPRING_DATASOURCE_PASSWORD + +# DolphinScheduler server related configuration +export SPRING_CACHE_TYPE=${SPRING_CACHE_TYPE:-none} +export SPRING_JACKSON_TIME_ZONE=${SPRING_JACKSON_TIME_ZONE:-UTC} +export MASTER_FETCH_COMMAND_NUM=${MASTER_FETCH_COMMAND_NUM:-10} + +# Registry center configuration, determines the type and link of the registry center +export REGISTRY_TYPE=${REGISTRY_TYPE:-zookeeper} +export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-localhost:2181} + +# Tasks related configurations, need to change the configuration if you use the related tasks. +export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} +export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} +export SPARK_HOME1=${SPARK_HOME1:-/opt/soft/spark1} +export SPARK_HOME2=${SPARK_HOME2:-/opt/soft/spark2} +export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} +export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} +export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} +export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} + +export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH ``` ### Services logback configs diff --git a/docs/docs/en/faq.md b/docs/docs/en/faq.md index 05d8be7dc1..1b4984485b 100644 --- a/docs/docs/en/faq.md +++ b/docs/docs/en/faq.md @@ -218,7 +218,7 @@ A: 1, in **the process definition list**, click the **Start** button. ## Q : Python task setting Python version -A: 1,**for the version after 1.0.3** only need to modify PYTHON_HOME in conf/env/.dolphinscheduler_env.sh +A: 1,**for the version after 1.0.3** only need to modify PYTHON_HOME in `bin/env/dolphinscheduler_env.sh` ``` export PYTHON_HOME=/bin/python diff --git a/docs/docs/en/guide/expansion-reduction.md b/docs/docs/en/guide/expansion-reduction.md index 5cb4ad821f..309ac6418b 100644 --- a/docs/docs/en/guide/expansion-reduction.md +++ b/docs/docs/en/guide/expansion-reduction.md @@ -73,10 +73,10 @@ sed -i 's/Defaults requirett/#Defaults requirett/g' /etc/sudoers datasource.properties: database connection information zookeeper.properties: information for connecting zk common.properties: Configuration information about the resource store (if hadoop is set up, please check if the core-site.xml and hdfs-site.xml configuration files exist). - env/dolphinscheduler_env.sh: environment Variables + dolphinscheduler_env.sh: environment Variables ```` -- Modify the `dolphinscheduler_env.sh` environment variable in the `conf/env` directory according to the machine configuration (the following is the example that all the used software install under `/opt/soft`) +- Modify the `dolphinscheduler_env.sh` environment variable in the `bin/env/dolphinscheduler_env.sh` directory according to the machine configuration (the following is the example that all the used software install under `/opt/soft`) ```shell export HADOOP_HOME=/opt/soft/hadoop diff --git a/docs/docs/en/guide/installation/cluster.md b/docs/docs/en/guide/installation/cluster.md index 4f1ead96a2..37032b85a0 100644 --- a/docs/docs/en/guide/installation/cluster.md +++ b/docs/docs/en/guide/installation/cluster.md @@ -6,7 +6,7 @@ If you are a new hand and want to experience DolphinScheduler functions, we reco ## Deployment Steps -Cluster deployment uses the same scripts and configuration files as [pseudo-cluster deployment](pseudo-cluster.md), so the preparation and deployment steps are the same as pseudo-cluster deployment. The difference is that [pseudo-cluster deployment](pseudo-cluster.md) is for one machine, while cluster deployment (Cluster) is for multiple machines. And steps of "Modify Configuration" are quite different between pseudo-cluster deployment and cluster deployment. +Cluster deployment uses the same scripts and configuration files as [pseudo-cluster deployment](pseudo-cluster.md), so the preparation and deployment steps are the same as pseudo-cluster deployment. The difference is that pseudo-cluster deployment is for one machine, while cluster deployment (Cluster) is for multiple machines. And steps of "Modify Configuration" are quite different between pseudo-cluster deployment and cluster deployment. ### Prerequisites and DolphinScheduler Startup Environment Preparations @@ -32,8 +32,8 @@ apiServers="ds5" ## Start and Login DolphinScheduler -Same as pseudo-cluster.md](pseudo-cluster.md) +Same as [pseudo-cluster](pseudo-cluster.md) ## Start and Stop Server -Same as pseudo-cluster.md](pseudo-cluster.md) \ No newline at end of file +Same as [pseudo-cluster](pseudo-cluster.md) \ No newline at end of file diff --git a/docs/docs/en/guide/installation/pseudo-cluster.md b/docs/docs/en/guide/installation/pseudo-cluster.md index d4191ef5fa..ee29a04407 100644 --- a/docs/docs/en/guide/installation/pseudo-cluster.md +++ b/docs/docs/en/guide/installation/pseudo-cluster.md @@ -87,7 +87,13 @@ sh script/create-dolphinscheduler.sh ## Modify Configuration -After completing the preparation of the basic environment, you need to modify the configuration file according to your environment. The configuration file is in the path of `conf/config/install_config.conf`. Generally, you just need to modify the **INSTALL MACHINE, DolphinScheduler ENV, Database, Registry Server** part to complete the deployment, the following describes the parameters that must be modified: +After completing the preparation of the basic environment, you need to modify the configuration file according to the +environment you used. The configuration files are both in directory `bin/env` and named `install_env.sh` and `dolphinscheduler_env.sh`. + +### Modify `install_env.sh` + +File `install_env.sh` describes which machines will be installed DolphinScheduler and what server will be installed on +each machine. You could find this file in the path `bin/env/install_env.sh` and the detail of the configuration as below. ```shell # --------------------------------------------------------- @@ -105,29 +111,30 @@ installPath="~/dolphinscheduler" # Deploy user, use the user you create in section **Configure machine SSH password-free login** deployUser="dolphinscheduler" +``` -# --------------------------------------------------------- -# DolphinScheduler ENV -# --------------------------------------------------------- -# The path of JAVA_HOME, which JDK install path in section **Preparation** -javaHome="/your/java/home/here" +### Modify `dolphinscheduler_env.sh` -# --------------------------------------------------------- -# Database -# --------------------------------------------------------- -# Database type, username, password, IP, port, metadata. For now `dbtype` supports `mysql` and `postgresql` -dbtype="mysql" -dbhost="localhost:3306" -# Need to modify if you are not using `dolphinscheduler/dolphinscheduler` as your username and password -username="dolphinscheduler" -password="dolphinscheduler" -dbname="dolphinscheduler" +File `dolphinscheduler_env.sh` describes the database configuration of DolphinScheduler, which in the path `bin/env/dolphinscheduler_env.sh` +and some tasks which need external dependencies or libraries such as `JAVA_HOME` and `SPARK_HOME`. You could ignore the +task external dependencies if you do not use those tasks, but you have to change `JAVA_HOME`, registry center and database +related configurations based on your environment. -# --------------------------------------------------------- -# Registry Server -# --------------------------------------------------------- -# Registration center address, the address of ZooKeeper service -registryServers="localhost:2181" +```sh +# JAVA_HOME, will use it to start DolphinScheduler server +export JAVA_HOME=${JAVA_HOME:-/custom/path} + +# Database related configuration, set database type, username and password +export DATABASE=${DATABASE:-postgresql} +export SPRING_PROFILES_ACTIVE=${DATABASE} +export SPRING_DATASOURCE_DRIVER_CLASS_NAME=org.postgresql.Driver +export SPRING_DATASOURCE_URL="jdbc:postgresql://127.0.0.1:5432/dolphinscheduler" +export SPRING_DATASOURCE_USERNAME="username" +export SPRING_DATASOURCE_PASSWORD="password" + +# Registry center configuration, determines the type and link of the registry center +export REGISTRY_TYPE=${REGISTRY_TYPE:-zookeeper} +export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-localhost:2181} ``` ## Initialize the Database @@ -178,7 +185,7 @@ sh tools/bin/create-schema.sh Use **deployment user** you created above, running the following command to complete the deployment, and the server log will be stored in the logs folder. ```shell -sh install.sh +sh ./bin/install.sh ``` > **_Note:_** For the first time deployment, there maybe occur five times of `sh: bin/dolphinscheduler-daemon.sh: No such file or directory` in the terminal, @@ -214,7 +221,12 @@ sh ./bin/dolphinscheduler-daemon.sh start alert-server sh ./bin/dolphinscheduler-daemon.sh stop alert-server ``` -> **_Note:_**: Please refer to the section of "System Architecture Design" for service usage. Python gateway service is +> **_Note1:_**: Each server have `dolphinscheduler_env.sh` file in path `/conf/dolphinscheduler_env.sh` which +> for micro-services need. It means that you could start all servers by command `/bin/start.sh` with different +> environment variable from `bin/env/dolphinscheduler_env.sh`. But it will use file `bin/env/dolphinscheduler_env.sh` overwrite +> `/conf/dolphinscheduler_env.sh` if you start server with command `/bin/dolphinscheduler-daemon.sh start `. + +> **_Note2:_**: Please refer to the section of "System Architecture Design" for service usage. Python gateway service is > started along with the api-server, and if you do not want to start Python gateway service please disabled it by changing > the yaml config `python-gateway.enabled : false` in api-server's configuration path `api-server/conf/application.yaml` diff --git a/docs/docs/en/guide/task/datax.md b/docs/docs/en/guide/task/datax.md index 8f43510e20..20cdec8588 100644 --- a/docs/docs/en/guide/task/datax.md +++ b/docs/docs/en/guide/task/datax.md @@ -40,7 +40,7 @@ This example demonstrates how to import data from Hive into MySQL. ### Configure the DataX environment in DolphinScheduler -If you are using the DataX task type in a production environment, it is necessary to configure the required environment first. The following is the configuration file: `/dolphinscheduler/conf/env/dolphinscheduler_env.sh`. +If you are using the DataX task type in a production environment, it is necessary to configure the required environment first. The following is the configuration file: `bin/env/dolphinscheduler_env.sh`. ![datax_task01](/img/tasks/demo/datax_task01.png) diff --git a/docs/docs/en/guide/task/flink.md b/docs/docs/en/guide/task/flink.md index 8199ddbbe3..a8205b1851 100644 --- a/docs/docs/en/guide/task/flink.md +++ b/docs/docs/en/guide/task/flink.md @@ -46,7 +46,7 @@ This is a common introductory case in the big data ecosystem, which often apply #### Configure the flink environment in DolphinScheduler -If you are using the flink task type in a production environment, it is necessary to configure the required environment first. The following is the configuration file: `/dolphinscheduler/conf/env/dolphinscheduler_env.sh`. +If you are using the flink task type in a production environment, it is necessary to configure the required environment first. The following is the configuration file: `bin/env/dolphinscheduler_env.sh`. ![demo-flink-simple](/img/tasks/demo/flink_task01.png) diff --git a/docs/docs/en/guide/task/map-reduce.md b/docs/docs/en/guide/task/map-reduce.md index 5ea69bab90..b1ffea4fee 100644 --- a/docs/docs/en/guide/task/map-reduce.md +++ b/docs/docs/en/guide/task/map-reduce.md @@ -54,7 +54,7 @@ This example is a common introductory type of MapReduce application, which used #### Configure the MapReduce Environment in DolphinScheduler -If you are using the MapReduce task type in a production environment, it is necessary to configure the required environment first. The following is the configuration file: `/dolphinscheduler/conf/env/dolphinscheduler_env.sh`. +If you are using the MapReduce task type in a production environment, it is necessary to configure the required environment first. The following is the configuration file: `bin/env/dolphinscheduler_env.sh`. ![mr_configure](/img/tasks/demo/mr_task01.png) diff --git a/docs/docs/en/guide/task/spark.md b/docs/docs/en/guide/task/spark.md index 1e9f31524f..e536d6d3e6 100644 --- a/docs/docs/en/guide/task/spark.md +++ b/docs/docs/en/guide/task/spark.md @@ -45,7 +45,7 @@ This is a common introductory case in the big data ecosystem, which often apply #### Configure the Spark Environment in DolphinScheduler -If you are using the Spark task type in a production environment, it is necessary to configure the required environment first. The following is the configuration file: `/dolphinscheduler/conf/env/dolphinscheduler_env.sh`. +If you are using the Spark task type in a production environment, it is necessary to configure the required environment first. The following is the configuration file: `bin/env/dolphinscheduler_env.sh`. ![spark_configure](/img/tasks/demo/spark_task01.png) diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index ceb99a21b8..7eba76ea2c 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -380,21 +380,42 @@ apiServers="ds1" ``` ## 11.dolphinscheduler_env.sh [环境变量配置] -通过类似shell方式提交任务的的时候,会加载该配置文件中的环境变量到主机中. -涉及到的任务类型有: Shell任务、Python任务、Spark任务、Flink任务、Datax任务等等 -```bash -export HADOOP_HOME=/opt/soft/hadoop -export HADOOP_CONF_DIR=/opt/soft/hadoop/etc/hadoop -export SPARK_HOME1=/opt/soft/spark1 -export SPARK_HOME2=/opt/soft/spark2 -export PYTHON_HOME=/opt/soft/python -export JAVA_HOME=/opt/soft/java -export HIVE_HOME=/opt/soft/hive -export FLINK_HOME=/opt/soft/flink -export DATAX_HOME=/opt/soft/datax/bin/datax.py - -export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_HOME:$PATH +通过类似shell方式提交任务的的时候,会加载该配置文件中的环境变量到主机中. 涉及到的 `JAVA_HOME`、元数据库、注册中心和任务类型配置,其中任务 +类型主要有: Shell任务、Python任务、Spark任务、Flink任务、Datax任务等等 + +```bash +# JAVA_HOME, will use it to start DolphinScheduler server +export JAVA_HOME=${JAVA_HOME:-/opt/soft/java} + +# Database related configuration, set database type, username and password +export DATABASE=${DATABASE:-postgresql} +export SPRING_PROFILES_ACTIVE=${DATABASE} +export SPRING_DATASOURCE_DRIVER_CLASS_NAME +export SPRING_DATASOURCE_URL +export SPRING_DATASOURCE_USERNAME +export SPRING_DATASOURCE_PASSWORD + +# DolphinScheduler server related configuration +export SPRING_CACHE_TYPE=${SPRING_CACHE_TYPE:-none} +export SPRING_JACKSON_TIME_ZONE=${SPRING_JACKSON_TIME_ZONE:-UTC} +export MASTER_FETCH_COMMAND_NUM=${MASTER_FETCH_COMMAND_NUM:-10} + +# Registry center configuration, determines the type and link of the registry center +export REGISTRY_TYPE=${REGISTRY_TYPE:-zookeeper} +export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-localhost:2181} + +# Tasks related configurations, need to change the configuration if you use the related tasks. +export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} +export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} +export SPARK_HOME1=${SPARK_HOME1:-/opt/soft/spark1} +export SPARK_HOME2=${SPARK_HOME2:-/opt/soft/spark2} +export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} +export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} +export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} +export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} + +export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH ``` ## 12.各服务日志配置文件 diff --git a/docs/docs/zh/faq.md b/docs/docs/zh/faq.md index 8d152aae40..563b90b53f 100644 --- a/docs/docs/zh/faq.md +++ b/docs/docs/zh/faq.md @@ -203,7 +203,7 @@ A: 1,在 **流程定义列表**,点击 **启动** 按钮 ## Q:Python 任务设置 Python 版本 -A: 只需要修改 conf/env/dolphinscheduler_env.sh 中的 PYTHON_HOME +A: 只需要修改 `bin/env/dolphinscheduler_env.sh` 中的 PYTHON_HOME ``` export PYTHON_HOME=/bin/python diff --git a/docs/docs/zh/guide/expansion-reduction.md b/docs/docs/zh/guide/expansion-reduction.md index 1ed6a5b818..3467581d3d 100644 --- a/docs/docs/zh/guide/expansion-reduction.md +++ b/docs/docs/zh/guide/expansion-reduction.md @@ -71,7 +71,7 @@ sed -i 's/Defaults requirett/#Defaults requirett/g' /etc/sudoers datasource.properties 中的数据库连接信息. zookeeper.properties 中的连接zk的信息. common.properties 中关于资源存储的配置信息(如果设置了hadoop,请检查是否存在core-site.xml和hdfs-site.xml配置文件). - env/dolphinscheduler_env.sh 中的环境变量 + dolphinscheduler_env.sh 中的环境变量 ```` - 根据机器配置,修改 conf/env 目录下的 `dolphinscheduler_env.sh` 环境变量(以相关用到的软件都安装在/opt/soft下为例) diff --git a/docs/docs/zh/guide/installation/cluster.md b/docs/docs/zh/guide/installation/cluster.md index 276a330043..194a411ca0 100644 --- a/docs/docs/zh/guide/installation/cluster.md +++ b/docs/docs/zh/guide/installation/cluster.md @@ -6,7 +6,7 @@ ## 部署步骤 -集群部署(Cluster)使用的脚本和配置文件与[伪集群部署](pseudo-cluster.md)中的配置一样,所以所需要的步骤也与[伪集群部署](pseudo-cluster.md)大致一样。区别就是[伪集群部署](pseudo-cluster.md)针对的是一台机器,而集群部署(Cluster)需要针对多台机器,且两者“修改相关配置”步骤区别较大 +集群部署(Cluster)使用的脚本和配置文件与[伪集群部署](pseudo-cluster.md)中的配置一样,所以所需要的步骤也与伪集群部署大致一样。区别就是伪集群部署针对的是一台机器,而集群部署(Cluster)需要针对多台机器,且两者“修改相关配置”步骤区别较大 ### 前置准备工作 && 准备 DolphinScheduler 启动环境 @@ -14,7 +14,7 @@ ### 修改相关配置 -这个是与[伪集群部署](pseudo-cluster.md)差异较大的一步,因为部署脚本会通过 `scp` 的方式将安装需要的资源传输到各个机器上,所以这一步我们仅需要修改运行`install.sh`脚本的所在机器的配置即可。配置文件在路径在`conf/config/install_config.conf`下,此处我们仅需修改**INSTALL MACHINE**,**DolphinScheduler ENV、Database、Registry Server**与[伪集群部署](pseudo-cluster.md)保持一致,下面对必须修改参数进行说明 +这个是与[伪集群部署](pseudo-cluster.md)差异较大的一步,因为部署脚本会通过 `scp` 的方式将安装需要的资源传输到各个机器上,所以这一步我们仅需要修改运行`install.sh`脚本的所在机器的配置即可。配置文件在路径在`conf/config/install_config.conf`下,此处我们仅需修改**INSTALL MACHINE**,**DolphinScheduler ENV、Database、Registry Server**与伪集群部署保持一致,下面对必须修改参数进行说明 ```shell # --------------------------------------------------------- diff --git a/docs/docs/zh/guide/installation/pseudo-cluster.md b/docs/docs/zh/guide/installation/pseudo-cluster.md index 43e904bc22..bb3fe816a1 100644 --- a/docs/docs/zh/guide/installation/pseudo-cluster.md +++ b/docs/docs/zh/guide/installation/pseudo-cluster.md @@ -87,47 +87,51 @@ sh script/create-dolphinscheduler.sh ## 修改相关配置 -完成了基础环境的准备后,在运行部署命令前,还需要根据环境修改配置文件。配置文件在路径在`conf/config/install_config.conf`下,一般部署只需要修改**INSTALL MACHINE、DolphinScheduler ENV、Database、Registry Server**部分即可完成部署,下面对必须修改参数进行说明 +完成基础环境的准备后,需要根据你的机器环境修改配置文件。配置文件可以在目录 `bin/env` 中找到,他们分别是 并命名为 `install_env.sh` 和 `dolphinscheduler_env.sh`。 + +### 修改 `install_env.sh` 文件 + +文件 `install_env.sh` 描述了哪些机器将被安装 DolphinScheduler 以及每台机器对应安装哪些服务。您可以在路径 `bin/env/install_env.sh` 中找到此文件,配置详情如下。 ```shell # --------------------------------------------------------- # INSTALL MACHINE # --------------------------------------------------------- -# 因为是在单节点上部署master、worker、API server,所以服务器的IP均为机器IP或者localhost +# Due to the master, worker, and API server being deployed on a single node, the IP of the server is the machine IP or localhost ips="localhost" masters="localhost" workers="localhost:default" alertServer="localhost" apiServers="localhost" -# DolphinScheduler安装路径,如果不存在会创建 +# DolphinScheduler installation path, it will auto-create if not exists installPath="~/dolphinscheduler" -# 部署用户,填写在 **配置用户免密及权限** 中创建的用户 +# Deploy user, use the user you create in section **Configure machine SSH password-free login** deployUser="dolphinscheduler" +``` -# --------------------------------------------------------- -# DolphinScheduler ENV -# --------------------------------------------------------- -# JAVA_HOME 的路径,是在 **前置准备工作** 安装的JDK中 JAVA_HOME 所在的位置 -javaHome="/your/java/home/here" +### 修改 `dolphinscheduler_env.sh` 文件 -# --------------------------------------------------------- -# Database -# --------------------------------------------------------- -# 数据库的类型,用户名,密码,IP,端口,元数据库db。其中dbtype目前支持 mysql 和 postgresql -dbtype="mysql" -dbhost="localhost:3306" -# 如果你不是以 dolphinscheduler/dolphinscheduler 作为用户名和密码的,需要进行修改 -username="dolphinscheduler" -password="dolphinscheduler" -dbname="dolphinscheduler" +文件 `dolphinscheduler_env.sh` 描述了 DolphinScheduler 的数据库配置,一些任务类型外部依赖路径或库文件,注册中心,其中 `JAVA_HOME` +和 `SPARK_HOME`都是在这里定义的,其路径是 `bin/env/dolphinscheduler_env.sh`。如果您不使用某些任务类型,您可以忽略任务外部依赖项, +但您必须根据您的环境更改 `JAVA_HOME`、注册中心和数据库相关配置。 -# --------------------------------------------------------- -# Registry Server -# --------------------------------------------------------- -# 注册中心地址,zookeeper服务的地址 -registryServers="localhost:2181" +```sh +# JAVA_HOME, will use it to start DolphinScheduler server +export JAVA_HOME=${JAVA_HOME:-/custom/path} + +# Database related configuration, set database type, username and password +export DATABASE=${DATABASE:-postgresql} +export SPRING_PROFILES_ACTIVE=${DATABASE} +export SPRING_DATASOURCE_DRIVER_CLASS_NAME=org.postgresql.Driver +export SPRING_DATASOURCE_URL="jdbc:postgresql://127.0.0.1:5432/dolphinscheduler" +export SPRING_DATASOURCE_USERNAME="username" +export SPRING_DATASOURCE_PASSWORD="password" + +# Registry center configuration, determines the type and link of the registry center +export REGISTRY_TYPE=${REGISTRY_TYPE:-zookeeper} +export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-localhost:2181} ``` ## 初始化数据库 @@ -178,7 +182,7 @@ sh tools/bin/create-schema.sh 使用上面创建的**部署用户**运行以下命令完成部署,部署后的运行日志将存放在 logs 文件夹内 ```shell -sh install.sh +sh ./bin/install.sh ``` > **_注意:_** 第一次部署的话,可能出现 5 次`sh: bin/dolphinscheduler-daemon.sh: No such file or directory`相关信息,次为非重要信息直接忽略即可 @@ -213,7 +217,13 @@ sh ./bin/dolphinscheduler-daemon.sh start alert-server sh ./bin/dolphinscheduler-daemon.sh stop alert-server ``` -> **_注意:_**:服务用途请具体参见《系统架构设计》小节 +> **_注意1:_**: 每个服务在路径 `/conf/dolphinscheduler_env.sh` 中都有 `dolphinscheduler_env.sh` 文件,这是可以为微 +> 服务需求提供便利。意味着您可以基于不同的环境变量来启动各个服务,只需要在对应服务中配置 `bin/env/dolphinscheduler_env.sh` 然后通过 `/bin/start.sh` +> 命令启动即可。但是如果您使用命令 `/bin/dolphinscheduler-daemon.sh start ` 启动服务器,它将会用文件 `bin/env/dolphinscheduler_env.sh` +> 覆盖 `/conf/dolphinscheduler_env.sh` 然后启动服务,目的是为了减少用户修改配置的成本. + +> **_注意2:_**:服务用途请具体参见《系统架构设计》小节。Python gateway service 默认与 api-server 一起启动,如果您不想启动 Python gateway service +> 请通过更改 api-server 配置文件 `api-server/conf/application.yaml` 中的 `python-gateway.enabled : false` 来禁用它。 [jdk]: https://www.oracle.com/technetwork/java/javase/downloads/index.html [zookeeper]: https://zookeeper.apache.org/releases.html diff --git a/docs/docs/zh/guide/task/datax.md b/docs/docs/zh/guide/task/datax.md index 5c0501a2f7..5a9e167980 100644 --- a/docs/docs/zh/guide/task/datax.md +++ b/docs/docs/zh/guide/task/datax.md @@ -40,7 +40,7 @@ DataX 任务类型,用于执行 DataX 程序。对于 DataX 节点,worker ### 在 DolphinScheduler 中配置 DataX 环境 -若生产环境中要是使用到 DataX 任务类型,则需要先配置好所需的环境。配置文件如下:`/dolphinscheduler/conf/env/dolphinscheduler_env.sh`。 +若生产环境中要是使用到 DataX 任务类型,则需要先配置好所需的环境。配置文件如下:`bin/env/dolphinscheduler_env.sh`。 ![datax_task01](/img/tasks/demo/datax_task01.png) diff --git a/docs/docs/zh/guide/task/flink.md b/docs/docs/zh/guide/task/flink.md index 4e415164e9..2aa34691c4 100644 --- a/docs/docs/zh/guide/task/flink.md +++ b/docs/docs/zh/guide/task/flink.md @@ -46,7 +46,7 @@ Flink 任务类型,用于执行 Flink 程序。对于 Flink 节点,worker #### 在 DolphinScheduler 中配置 flink 环境 -若生产环境中要是使用到 flink 任务类型,则需要先配置好所需的环境。配置文件如下:`/dolphinscheduler/conf/env/dolphinscheduler_env.sh`。 +若生产环境中要是使用到 flink 任务类型,则需要先配置好所需的环境。配置文件如下:`bin/env/dolphinscheduler_env.sh`。 ![flink-configure](/img/tasks/demo/flink_task01.png) diff --git a/docs/docs/zh/guide/task/map-reduce.md b/docs/docs/zh/guide/task/map-reduce.md index e0f647442e..f042ca1d40 100644 --- a/docs/docs/zh/guide/task/map-reduce.md +++ b/docs/docs/zh/guide/task/map-reduce.md @@ -54,7 +54,7 @@ MapReduce(MR) 任务类型,用于执行 MapReduce 程序。对于 MapReduce #### 在 DolphinScheduler 中配置 MapReduce 环境 -若生产环境中要是使用到 MapReduce 任务类型,则需要先配置好所需的环境。配置文件如下:`/dolphinscheduler/conf/env/dolphinscheduler_env.sh`。 +若生产环境中要是使用到 MapReduce 任务类型,则需要先配置好所需的环境。配置文件如下:`bin/env/dolphinscheduler_env.sh`。 ![mr_configure](/img/tasks/demo/mr_task01.png) diff --git a/docs/docs/zh/guide/task/spark.md b/docs/docs/zh/guide/task/spark.md index 442eb8b3ee..4443457f08 100644 --- a/docs/docs/zh/guide/task/spark.md +++ b/docs/docs/zh/guide/task/spark.md @@ -46,7 +46,7 @@ Spark 任务类型,用于执行 Spark 程序。对于 Spark 节点,worker #### 在 DolphinScheduler 中配置 Spark 环境 -若生产环境中要是使用到 Spark 任务类型,则需要先配置好所需的环境。配置文件如下:`/dolphinscheduler/conf/env/dolphinscheduler_env.sh`。 +若生产环境中要是使用到 Spark 任务类型,则需要先配置好所需的环境。配置文件如下:`bin/env/dolphinscheduler_env.sh`。 ![spark_configure](/img/tasks/demo/spark_task01.png) diff --git a/docs/docs/zh/guide/upgrade.md b/docs/docs/zh/guide/upgrade.md index 3e95874557..69a8b76597 100644 --- a/docs/docs/zh/guide/upgrade.md +++ b/docs/docs/zh/guide/upgrade.md @@ -15,7 +15,7 @@ ## 4. 数据库升级 - 将`./tools/conf/application.yaml`中的username和password改成你设定数据库用户名和密码 -- 如果选择 MySQL,请修改`./tools/bin/dolphinscheduler_env.sh`中的如下配置, 还需要手动添加 [[ mysql-connector-java 驱动 jar ](https://downloads.MySQL.com/archives/c-j/)] 包到 lib 目录(`./tools/lib`)下,这里下载的是mysql-connector-java-8.0.16.jar +- 如果选择 MySQL,请修改`./tools/bin/dolphinscheduler_env.sh`中的如下配置, 还需要手动添加 [ mysql-connector-java 驱动 jar ](https://downloads.MySQL.com/archives/c-j/) 包到 lib 目录(`./tools/lib`)下,这里下载的是mysql-connector-java-8.0.16.jar ```shell export DATABASE=${DATABASE:-mysql} diff --git a/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/assembly/dolphinscheduler-alert-server.xml b/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/assembly/dolphinscheduler-alert-server.xml index 259051fed4..6f39096221 100644 --- a/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/assembly/dolphinscheduler-alert-server.xml +++ b/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/assembly/dolphinscheduler-alert-server.xml @@ -41,7 +41,7 @@ ${basedir}/../../script/env - bin + conf dolphinscheduler_env.sh diff --git a/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/bin/start.sh b/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/bin/start.sh index 14ed6d169e..06cadf0b02 100644 --- a/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/bin/start.sh +++ b/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/bin/start.sh @@ -19,7 +19,7 @@ BIN_DIR=$(dirname $0) DOLPHINSCHEDULER_HOME=${DOLPHINSCHEDULER_HOME:-$(cd $BIN_DIR/..; pwd)} -source "$BIN_DIR/dolphinscheduler_env.sh" +source "$DOLPHINSCHEDULER_HOME/conf/dolphinscheduler_env.sh" JAVA_OPTS=${JAVA_OPTS:-"-server -Duser.timezone=${SPRING_JACKSON_TIME_ZONE} -Xms1g -Xmx1g -Xmn512m -XX:+PrintGCDetails -Xloggc:gc.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=dump.hprof"} diff --git a/dolphinscheduler-api/src/main/assembly/dolphinscheduler-api-server.xml b/dolphinscheduler-api/src/main/assembly/dolphinscheduler-api-server.xml index edc99e8f1c..3fe7fb705b 100644 --- a/dolphinscheduler-api/src/main/assembly/dolphinscheduler-api-server.xml +++ b/dolphinscheduler-api/src/main/assembly/dolphinscheduler-api-server.xml @@ -41,7 +41,7 @@ ${basedir}/../script/env - bin + conf dolphinscheduler_env.sh diff --git a/dolphinscheduler-api/src/main/bin/start.sh b/dolphinscheduler-api/src/main/bin/start.sh index 15dc38c537..223e07a53c 100644 --- a/dolphinscheduler-api/src/main/bin/start.sh +++ b/dolphinscheduler-api/src/main/bin/start.sh @@ -19,7 +19,7 @@ BIN_DIR=$(dirname $0) DOLPHINSCHEDULER_HOME=${DOLPHINSCHEDULER_HOME:-$(cd $BIN_DIR/..; pwd)} -source "$BIN_DIR/dolphinscheduler_env.sh" +source "$DOLPHINSCHEDULER_HOME/conf/dolphinscheduler_env.sh" JAVA_OPTS=${JAVA_OPTS:-"-server -Duser.timezone=${SPRING_JACKSON_TIME_ZONE} -Xms1g -Xmx1g -Xmn512m -XX:+PrintGCDetails -Xloggc:gc.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=dump.hprof"} diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java index e26a6f0036..fc49132ccd 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/Constants.java @@ -120,7 +120,7 @@ public final class Constants { /** * environment properties default path */ - public static final String ENV_PATH = "env/dolphinscheduler_env.sh"; + public static final String ENV_PATH = "dolphinscheduler_env.sh"; /** * resource.view.suffixs diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/CommonUtils.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/CommonUtils.java index ffed1d13f4..01c9ec974d 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/CommonUtils.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/CommonUtils.java @@ -40,7 +40,7 @@ public class CommonUtils { private static final Base64 BASE64 = new Base64(); - private CommonUtils() { + protected CommonUtils() { throw new UnsupportedOperationException("Construct CommonUtils"); } diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index 094bdd26c4..fd26cd2bb4 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -83,7 +83,7 @@ sudo.enable=true #dolphin.scheduler.network.priority.strategy=default # system env path -#dolphinscheduler.env.path=env/dolphinscheduler_env.sh +#dolphinscheduler.env.path=dolphinscheduler_env.sh # development state development.state=false diff --git a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/CommonUtilsTest.java b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/CommonUtilsTest.java index 713709030f..4facb999dd 100644 --- a/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/CommonUtilsTest.java +++ b/dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/utils/CommonUtilsTest.java @@ -41,8 +41,9 @@ public class CommonUtilsTest { private static final Logger logger = LoggerFactory.getLogger(CommonUtilsTest.class); @Test public void getSystemEnvPath() { - logger.info(CommonUtils.getSystemEnvPath()); - Assert.assertTrue(true); + String envPath; + envPath = CommonUtils.getSystemEnvPath(); + Assert.assertEquals("/etc/profile", envPath); } @Test diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties index 90fd35d275..b6f5126d9b 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/common.properties @@ -76,7 +76,7 @@ sudo.enable=true # network IP gets priority, default: inner outer #dolphin.scheduler.network.priority.strategy=default # system env path -#dolphinscheduler.env.path=env/dolphinscheduler_env.sh +#dolphinscheduler.env.path=dolphinscheduler_env.sh # development state development.state=false # rpc port diff --git a/dolphinscheduler-master/src/main/assembly/dolphinscheduler-master-server.xml b/dolphinscheduler-master/src/main/assembly/dolphinscheduler-master-server.xml index 760163328e..9fc3a3b679 100644 --- a/dolphinscheduler-master/src/main/assembly/dolphinscheduler-master-server.xml +++ b/dolphinscheduler-master/src/main/assembly/dolphinscheduler-master-server.xml @@ -41,7 +41,7 @@ ${basedir}/../script/env - bin + conf dolphinscheduler_env.sh diff --git a/dolphinscheduler-master/src/main/bin/start.sh b/dolphinscheduler-master/src/main/bin/start.sh index f4ee3c58f9..5373640cc7 100644 --- a/dolphinscheduler-master/src/main/bin/start.sh +++ b/dolphinscheduler-master/src/main/bin/start.sh @@ -19,7 +19,7 @@ BIN_DIR=$(dirname $0) DOLPHINSCHEDULER_HOME=${DOLPHINSCHEDULER_HOME:-$(cd $BIN_DIR/..; pwd)} -source "$BIN_DIR/dolphinscheduler_env.sh" +source "$DOLPHINSCHEDULER_HOME/conf/dolphinscheduler_env.sh" JAVA_OPTS=${JAVA_OPTS:-"-server -Duser.timezone=${SPRING_JACKSON_TIME_ZONE} -Xms4g -Xmx4g -Xmn2g -XX:+PrintGCDetails -Xloggc:gc.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=dump.hprof"} diff --git a/dolphinscheduler-standalone-server/src/main/assembly/dolphinscheduler-standalone-server.xml b/dolphinscheduler-standalone-server/src/main/assembly/dolphinscheduler-standalone-server.xml index e66fce1287..8f59f1a8d9 100644 --- a/dolphinscheduler-standalone-server/src/main/assembly/dolphinscheduler-standalone-server.xml +++ b/dolphinscheduler-standalone-server/src/main/assembly/dolphinscheduler-standalone-server.xml @@ -69,7 +69,7 @@ ${basedir}/../script/env - bin + conf dolphinscheduler_env.sh @@ -77,19 +77,16 @@ 0755 - ${basedir}/../script/env - dist-bin + ${basedir}/../dolphinscheduler-dao/src/main/resources - dolphinscheduler_env.sh + sql/**/* - 0755 - 0755 + conf - - ${basedir}/../dolphinscheduler-dao/src/main/resources + ${basedir}/../dolphinscheduler-common/src/main/resources - sql/**/* + common.properties conf diff --git a/dolphinscheduler-standalone-server/src/main/bin/start.sh b/dolphinscheduler-standalone-server/src/main/bin/start.sh index 935db38bb9..63079e948b 100755 --- a/dolphinscheduler-standalone-server/src/main/bin/start.sh +++ b/dolphinscheduler-standalone-server/src/main/bin/start.sh @@ -19,7 +19,7 @@ BIN_DIR=$(dirname $0) DOLPHINSCHEDULER_HOME=${DOLPHINSCHEDULER_HOME:-$(cd $BIN_DIR/..; pwd)} -source "$BIN_DIR/dolphinscheduler_env.sh" +source "$DOLPHINSCHEDULER_HOME/conf/dolphinscheduler_env.sh" JAVA_OPTS=${JAVA_OPTS:-"-server -Duser.timezone=${SPRING_JACKSON_TIME_ZONE} -Xms1g -Xmx1g -Xmn512m -XX:+PrintGCDetails -Xloggc:gc.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=dump.hprof"} diff --git a/dolphinscheduler-standalone-server/src/main/dist-bin/start.sh b/dolphinscheduler-standalone-server/src/main/dist-bin/start.sh index 5fef766e08..584e5f6f08 100755 --- a/dolphinscheduler-standalone-server/src/main/dist-bin/start.sh +++ b/dolphinscheduler-standalone-server/src/main/dist-bin/start.sh @@ -19,7 +19,7 @@ BIN_DIR=$(dirname $0) DOLPHINSCHEDULER_HOME=${DOLPHINSCHEDULER_HOME:-$(cd $BIN_DIR/..; pwd)} -source "$BIN_DIR/dolphinscheduler_env.sh" +source "$DOLPHINSCHEDULER_HOME/conf/dolphinscheduler_env.sh" JAVA_OPTS=${JAVA_OPTS:-"-server -Duser.timezone=${SPRING_JACKSON_TIME_ZONE} -Xms1g -Xmx1g -Xmn512m -XX:+PrintGCDetails -Xloggc:gc.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=dump.hprof"} diff --git a/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml b/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml index a24230c3cb..02a656420d 100644 --- a/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml +++ b/dolphinscheduler-tools/src/main/assembly/dolphinscheduler-tools.xml @@ -40,7 +40,7 @@ ${basedir}/../script/env - bin + conf dolphinscheduler_env.sh diff --git a/dolphinscheduler-tools/src/main/bin/upgrade-schema.sh b/dolphinscheduler-tools/src/main/bin/upgrade-schema.sh index feb826a230..f545e294e2 100755 --- a/dolphinscheduler-tools/src/main/bin/upgrade-schema.sh +++ b/dolphinscheduler-tools/src/main/bin/upgrade-schema.sh @@ -19,7 +19,7 @@ BIN_DIR=$(dirname $0) DOLPHINSCHEDULER_HOME=${DOLPHINSCHEDULER_HOME:-$(cd $BIN_DIR/../..; pwd)} -source "$DOLPHINSCHEDULER_HOME/tools/bin/dolphinscheduler_env.sh" +source "$DOLPHINSCHEDULER_HOME/conf/dolphinscheduler_env.sh" JAVA_OPTS=${JAVA_OPTS:-"-server -Duser.timezone=${SPRING_JACKSON_TIME_ZONE} -Xms1g -Xmx1g -Xmn512m -XX:+PrintGCDetails -Xloggc:gc.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=dump.hprof"} diff --git a/dolphinscheduler-worker/src/main/assembly/dolphinscheduler-worker-server.xml b/dolphinscheduler-worker/src/main/assembly/dolphinscheduler-worker-server.xml index e9c2a88ff6..70622942f0 100644 --- a/dolphinscheduler-worker/src/main/assembly/dolphinscheduler-worker-server.xml +++ b/dolphinscheduler-worker/src/main/assembly/dolphinscheduler-worker-server.xml @@ -42,7 +42,7 @@ ${basedir}/../script/env - bin + conf dolphinscheduler_env.sh diff --git a/dolphinscheduler-worker/src/main/bin/start.sh b/dolphinscheduler-worker/src/main/bin/start.sh index 514be53116..1a865ca6bc 100644 --- a/dolphinscheduler-worker/src/main/bin/start.sh +++ b/dolphinscheduler-worker/src/main/bin/start.sh @@ -19,6 +19,8 @@ BIN_DIR=$(dirname $0) DOLPHINSCHEDULER_HOME=${DOLPHINSCHEDULER_HOME:-$(cd $BIN_DIR/..; pwd)} +source "$DOLPHINSCHEDULER_HOME/conf/dolphinscheduler_env.sh" + chmod -R 700 ${DOLPHINSCHEDULER_HOME}/config export DOLPHINSCHEDULER_WORK_HOME=${DOLPHINSCHEDULER_HOME} diff --git a/script/dolphinscheduler-daemon.sh b/script/dolphinscheduler-daemon.sh index 7f83f2bb8d..ffcd87f72a 100755 --- a/script/dolphinscheduler-daemon.sh +++ b/script/dolphinscheduler-daemon.sh @@ -34,6 +34,21 @@ echo "Begin $startStop $command......" BIN_DIR=`dirname $0` BIN_DIR=`cd "$BIN_DIR"; pwd` DOLPHINSCHEDULER_HOME=`cd "$BIN_DIR/.."; pwd` +BIN_ENV_FILE="${DOLPHINSCHEDULER_HOME}/bin/env/dolphinscheduler_env.sh" + +# Overwrite server dolphinscheduler_env.sh in path `/conf/dolphinscheduler_env.sh` when exists +# `bin/env/dolphinscheduler_env.sh` file. User could only change `bin/env/dolphinscheduler_env.sh` instead +# of each server's dolphinscheduler_env.sh when they want to start the server +function overwrite_server_env() { + local server=$1 + local server_env_file="${DOLPHINSCHEDULER_HOME}/${server}/conf/dolphinscheduler_env.sh" + if [ -f "${BIN_ENV_FILE}" ]; then + echo "Overwrite ${server}/conf/dolphinscheduler_env.sh using bin/env/dolphinscheduler_env.sh." + cp "${BIN_ENV_FILE}" "${server_env_file}" + else + echo "Start server ${server} using env config path ${server_env_file}, because file ${BIN_ENV_FILE} not exists." + fi +} source "${DOLPHINSCHEDULER_HOME}/bin/env/dolphinscheduler_env.sh" @@ -69,6 +84,7 @@ fi case $startStop in (start) echo starting $command, logging to $DOLPHINSCHEDULER_LOG_DIR + overwrite_server_env "${command}" nohup /bin/bash "$DOLPHINSCHEDULER_HOME/$command/bin/start.sh" > $log 2>&1 & echo $! > $pid ;; diff --git a/script/env/dolphinscheduler_env.sh b/script/env/dolphinscheduler_env.sh index 5e07aabb9e..26eda694db 100755 --- a/script/env/dolphinscheduler_env.sh +++ b/script/env/dolphinscheduler_env.sh @@ -15,28 +15,34 @@ # limitations under the License. # -export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} -export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} -export SPARK_HOME1=${SPARK_HOME1:-/opt/soft/spark1} -export SPARK_HOME2=${SPARK_HOME2:-/opt/soft/spark2} -export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} +# JAVA_HOME, will use it to start DolphinScheduler server export JAVA_HOME=${JAVA_HOME:-/opt/soft/java} -export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} -export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} -export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} - -export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH -export SPRING_JACKSON_TIME_ZONE=${SPRING_JACKSON_TIME_ZONE:-UTC} +# Database related configuration, set database type, username and password export DATABASE=${DATABASE:-postgresql} export SPRING_PROFILES_ACTIVE=${DATABASE} export SPRING_DATASOURCE_DRIVER_CLASS_NAME export SPRING_DATASOURCE_URL export SPRING_DATASOURCE_USERNAME export SPRING_DATASOURCE_PASSWORD -export SPRING_CACHE_TYPE=${SPRING_CACHE_TYPE:-none} +# DolphinScheduler server related configuration +export SPRING_CACHE_TYPE=${SPRING_CACHE_TYPE:-none} +export SPRING_JACKSON_TIME_ZONE=${SPRING_JACKSON_TIME_ZONE:-UTC} export MASTER_FETCH_COMMAND_NUM=${MASTER_FETCH_COMMAND_NUM:-10} +# Registry center configuration, determines the type and link of the registry center export REGISTRY_TYPE=${REGISTRY_TYPE:-zookeeper} export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-localhost:2181} + +# Tasks related configurations, need to change the configuration if you use the related tasks. +export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} +export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} +export SPARK_HOME1=${SPARK_HOME1:-/opt/soft/spark1} +export SPARK_HOME2=${SPARK_HOME2:-/opt/soft/spark2} +export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} +export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} +export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} +export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} + +export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH diff --git a/script/scp-hosts.sh b/script/scp-hosts.sh index a9499196d8..b5cd611984 100755 --- a/script/scp-hosts.sh +++ b/script/scp-hosts.sh @@ -40,7 +40,7 @@ do fi echo "scp dirs to $host/$installPath starting" - ssh -p $sshPort $host "cd $installPath/; rm -rf bin/ conf/ lib/ script/ sql/ ui/" + ssh -p $sshPort $host "cd $installPath/; rm -rf bin/ master-server/ worker-server/ alert-server/ api-server/ ui/ tools/" for i in ${!workerNames[@]}; do if [[ ${workerNames[$i]} == $host ]]; then @@ -51,7 +51,7 @@ do # set worker groups in application.yaml [[ -n ${workerIndex} ]] && sed -i "s/- default/- ${groupNames[$workerIndex]}/" worker-server/conf/application.yaml - for dsDir in bin master-server worker-server alert-server api-server ui + for dsDir in bin master-server worker-server alert-server api-server ui tools do echo "start to scp $dsDir to $host/$installPath" # Use quiet mode to reduce command line output