Browse Source

impv: Correct datax execute and python execute script name (#14523)

* impv: Correct datax execute and python execute script name

we use PYTHON_LAUNCHER for python script execute and
DATAX_LAUNCHER for datax script name

* Add pr number

* fix ut

* style

---------

Co-authored-by: xiangzihao <460888207@qq.com>
3.2.1-prepare
Jay Chung 1 year ago committed by GitHub
parent
commit
de2d215aee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 6
      .github/workflows/cluster-test/mysql/dolphinscheduler_env.sh
  2. 6
      .github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh
  3. 2
      README.md
  4. 4
      deploy/kubernetes/dolphinscheduler/values.yaml
  5. 6
      docs/docs/en/architecture/configuration.md
  6. 8
      docs/docs/en/faq.md
  7. 6
      docs/docs/en/guide/expansion-reduction.md
  8. 6
      docs/docs/en/guide/installation/kubernetes.md
  9. 6
      docs/docs/en/guide/installation/pseudo-cluster.md
  10. 2
      docs/docs/en/guide/integration/rainbond.md
  11. 4
      docs/docs/en/guide/task/datax.md
  12. 2
      docs/docs/en/guide/task/openmldb.md
  13. 6
      docs/docs/en/guide/task/pytorch.md
  14. 1
      docs/docs/en/guide/upgrade/incompatible.md
  15. 6
      docs/docs/zh/architecture/configuration.md
  16. 8
      docs/docs/zh/faq.md
  17. 6
      docs/docs/zh/guide/expansion-reduction.md
  18. 6
      docs/docs/zh/guide/installation/kubernetes.md
  19. 6
      docs/docs/zh/guide/installation/pseudo-cluster.md
  20. 2
      docs/docs/zh/guide/integration/rainbond.md
  21. 4
      docs/docs/zh/guide/task/datax.md
  22. 2
      docs/docs/zh/guide/task/openmldb.md
  23. 14
      docs/docs/zh/guide/task/pytorch.md
  24. 1
      docs/docs/zh/guide/upgrade/incompatible.md
  25. 6
      dolphinscheduler-api/src/test/java/org/apache/dolphinscheduler/api/service/EnvironmentServiceTest.java
  26. 6
      dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/mapper/ClusterMapperTest.java
  27. 6
      dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/mapper/EnvironmentMapperTest.java
  28. 2
      dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/utils/TaskCacheUtilsTest.java
  29. 12
      dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/main/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTask.java
  30. 6
      dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/test/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTaskTest.java
  31. 4
      dolphinscheduler-task-plugin/dolphinscheduler-task-openmldb/src/main/java/org/apache/dolphinscheduler/plugin/task/openmldb/OpenmldbTask.java
  32. 35
      dolphinscheduler-task-plugin/dolphinscheduler-task-python/src/main/java/org/apache/dolphinscheduler/plugin/task/python/PythonConstants.java
  33. 8
      dolphinscheduler-task-plugin/dolphinscheduler-task-python/src/main/java/org/apache/dolphinscheduler/plugin/task/python/PythonTask.java
  34. 2
      dolphinscheduler-task-plugin/dolphinscheduler-task-python/src/test/java/org/apache/dolphinscheduler/plugin/task/python/PythonTaskTest.java
  35. 2
      dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/main/java/org/apache/dolphinscheduler/plugin/task/pytorch/PythonEnvManager.java
  36. 10
      dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/main/java/org/apache/dolphinscheduler/plugin/task/pytorch/PytorchParameters.java
  37. 2
      dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/main/java/org/apache/dolphinscheduler/plugin/task/pytorch/PytorchTask.java
  38. 14
      dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/test/java/org/apache/dolphinscheduler/plugin/task/pytorch/PytorchTaskTest.java
  39. 2
      dolphinscheduler-ui/src/locales/en_US/project.ts
  40. 2
      dolphinscheduler-ui/src/locales/zh_CN/project.ts
  41. 2
      dolphinscheduler-ui/src/views/projects/task/components/node/tasks/use-pytorch.ts
  42. 6
      dolphinscheduler-ui/src/views/security/environment-manage/components/environment-modal.tsx

6
.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh

@ -39,12 +39,12 @@ export REGISTRY_ZOOKEEPER_BLOCK_UNTIL_CONNECTED=30000ms
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark} export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/opt/soft/python/bin/python3}
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
export MASTER_RESERVED_MEMORY=0.01 export MASTER_RESERVED_MEMORY=0.01
export WORKER_RESERVED_MEMORY=0.01 export WORKER_RESERVED_MEMORY=0.01

6
.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh

@ -39,12 +39,12 @@ export REGISTRY_ZOOKEEPER_BLOCK_UNTIL_CONNECTED=30000ms
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark} export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/opt/soft/python/bin/python3}
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
export MASTER_RESERVED_MEMORY=0.01 export MASTER_RESERVED_MEMORY=0.01
export WORKER_RESERVED_MEMORY=0.01 export WORKER_RESERVED_MEMORY=0.01

2
README.md

@ -76,3 +76,5 @@ DolphinScheduler enriches the <a href="https://landscape.cncf.io/?landscape=obse
</p > </p >
/opt/soft/datax

4
deploy/kubernetes/dolphinscheduler/values.yaml

@ -258,11 +258,11 @@ common:
HADOOP_HOME: "/opt/soft/hadoop" HADOOP_HOME: "/opt/soft/hadoop"
HADOOP_CONF_DIR: "/opt/soft/hadoop/etc/hadoop" HADOOP_CONF_DIR: "/opt/soft/hadoop/etc/hadoop"
SPARK_HOME: "/opt/soft/spark" SPARK_HOME: "/opt/soft/spark"
PYTHON_HOME: "/usr/bin/python" PYTHON_LAUNCHER: "/usr/bin/python/bin/python3"
JAVA_HOME: "/opt/java/openjdk" JAVA_HOME: "/opt/java/openjdk"
HIVE_HOME: "/opt/soft/hive" HIVE_HOME: "/opt/soft/hive"
FLINK_HOME: "/opt/soft/flink" FLINK_HOME: "/opt/soft/flink"
DATAX_HOME: "/opt/soft/datax" DATAX_LAUNCHER: "/opt/soft/datax/bin/datax.py"
## Shared storage persistence mounted into api, master and worker, such as Hadoop, Spark, Flink and DataX binary package ## Shared storage persistence mounted into api, master and worker, such as Hadoop, Spark, Flink and DataX binary package
sharedStoragePersistence: sharedStoragePersistence:
enabled: false enabled: false

6
docs/docs/en/architecture/configuration.md

@ -373,12 +373,12 @@ export JAVA_HOME=${JAVA_HOME:-/opt/soft/java}
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark} export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/opt/soft/python/bin/python3}
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log
export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/*

8
docs/docs/en/faq.md

@ -220,16 +220,16 @@ A: 1, in **the process definition list**, click the **Start** button.
## Q : Python task setting Python version ## Q : Python task setting Python version
A: 1,**for the version after 1.0.3** only need to modify PYTHON_HOME in `bin/env/dolphinscheduler_env.sh` A: 1,**for the version after 1.0.3** only need to modify `$PYTHON_LAUNCHER` in `bin/env/dolphinscheduler_env.sh`
``` ```
export PYTHON_HOME=/bin/python export PYTHON_LAUNCHER=/bin/python/bin/python3
``` ```
Note: This is **PYTHON_HOME** , which is the absolute path of the python command, not the simple PYTHON_HOME. Also note that when exporting the PATH, you need to directly Note: This is **PYTHON_LAUNCHER** , which is the absolute path of the python command, not the simple PYTHON_LAUNCHER. Also note that when exporting the PATH, you need to directly
``` ```
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH
``` ```
2,For versions prior to 1.0.3, the Python task only supports the Python version of the system. It does not support specifying the Python version. 2,For versions prior to 1.0.3, the Python task only supports the Python version of the system. It does not support specifying the Python version.

6
docs/docs/en/guide/expansion-reduction.md

@ -80,12 +80,12 @@ Attention:
export HADOOP_HOME=/opt/soft/hadoop export HADOOP_HOME=/opt/soft/hadoop
export HADOOP_CONF_DIR=/opt/soft/hadoop/etc/hadoop export HADOOP_CONF_DIR=/opt/soft/hadoop/etc/hadoop
export SPARK_HOME=/opt/soft/spark export SPARK_HOME=/opt/soft/spark
export PYTHON_HOME=/opt/soft/python export PYTHON_LAUNCHER=/opt/soft/python/bin/python3
export JAVA_HOME=/opt/soft/jav export JAVA_HOME=/opt/soft/jav
export HIVE_HOME=/opt/soft/hive export HIVE_HOME=/opt/soft/hive
export FLINK_HOME=/opt/soft/flink export FLINK_HOME=/opt/soft/flink
export DATAX_HOME=/opt/soft/datax/bin/datax.py export DATAX_LAUNCHER=/opt/soft/datax/bin/datax.py
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_HOME:$PATH export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
``` ```

6
docs/docs/en/guide/installation/kubernetes.md

@ -372,7 +372,7 @@ docker build -t apache/dolphinscheduler-worker:python3 .
4. Modify image `repository` and update `tag` to `python3` in `values.yaml`. 4. Modify image `repository` and update `tag` to `python3` in `values.yaml`.
5. Modify `PYTHON_HOME` to `/usr/bin/python3` in `values.yaml`. 5. Modify `PYTHON_LAUNCHER` to `/usr/bin/python3` in `values.yaml`.
6. Run a DolphinScheduler release in Kubernetes (See **Install DolphinScheduler**). 6. Run a DolphinScheduler release in Kubernetes (See **Install DolphinScheduler**).
@ -575,11 +575,11 @@ common:
| `common.configmap.HADOOP_HOME` | Set `HADOOP_HOME` for DolphinScheduler's task environment | `/opt/soft/hadoop` | | `common.configmap.HADOOP_HOME` | Set `HADOOP_HOME` for DolphinScheduler's task environment | `/opt/soft/hadoop` |
| `common.configmap.HADOOP_CONF_DIR` | Set `HADOOP_CONF_DIR` for DolphinScheduler's task environment | `/opt/soft/hadoop/etc/hadoop` | | `common.configmap.HADOOP_CONF_DIR` | Set `HADOOP_CONF_DIR` for DolphinScheduler's task environment | `/opt/soft/hadoop/etc/hadoop` |
| `common.configmap.SPARK_HOME` | Set `SPARK_HOME` for DolphinScheduler's task environment | `/opt/soft/spark` | | `common.configmap.SPARK_HOME` | Set `SPARK_HOME` for DolphinScheduler's task environment | `/opt/soft/spark` |
| `common.configmap.PYTHON_HOME` | Set `PYTHON_HOME` for DolphinScheduler's task environment | `/usr/bin/python` | | `common.configmap.PYTHON_LAUNCHER` | Set `PYTHON_LAUNCHER` for DolphinScheduler's task environment | `/usr/bin/python` |
| `common.configmap.JAVA_HOME` | Set `JAVA_HOME` for DolphinScheduler's task environment | `/opt/java/openjdk` | | `common.configmap.JAVA_HOME` | Set `JAVA_HOME` for DolphinScheduler's task environment | `/opt/java/openjdk` |
| `common.configmap.HIVE_HOME` | Set `HIVE_HOME` for DolphinScheduler's task environment | `/opt/soft/hive` | | `common.configmap.HIVE_HOME` | Set `HIVE_HOME` for DolphinScheduler's task environment | `/opt/soft/hive` |
| `common.configmap.FLINK_HOME` | Set `FLINK_HOME` for DolphinScheduler's task environment | `/opt/soft/flink` | | `common.configmap.FLINK_HOME` | Set `FLINK_HOME` for DolphinScheduler's task environment | `/opt/soft/flink` |
| `common.configmap.DATAX_HOME` | Set `DATAX_HOME` for DolphinScheduler's task environment | `/opt/soft/datax` | | `common.configmap.DATAX_LAUNCHER` | Set `DATAX_LAUNCHER` for DolphinScheduler's task environment | `/opt/soft/datax` |
| `common.sharedStoragePersistence.enabled` | Set `common.sharedStoragePersistence.enabled` to `true` to mount a shared storage volume for Hadoop, Spark binary and etc | `false` | | `common.sharedStoragePersistence.enabled` | Set `common.sharedStoragePersistence.enabled` to `true` to mount a shared storage volume for Hadoop, Spark binary and etc | `false` |
| `common.sharedStoragePersistence.mountPath` | The mount path for the shared storage volume | `/opt/soft` | | `common.sharedStoragePersistence.mountPath` | The mount path for the shared storage volume | `/opt/soft` |
| `common.sharedStoragePersistence.accessModes` | `PersistentVolumeClaim` access modes, must be `ReadWriteMany` | `[ReadWriteMany]` | | `common.sharedStoragePersistence.accessModes` | `PersistentVolumeClaim` access modes, must be `ReadWriteMany` | `[ReadWriteMany]` |

6
docs/docs/en/guide/installation/pseudo-cluster.md

@ -133,12 +133,12 @@ export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-lo
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark} export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/opt/soft/python/bin/python3}
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
``` ```
## Initialize the Database ## Initialize the Database

2
docs/docs/en/guide/integration/rainbond.md

@ -47,7 +47,7 @@ API and Worker Services share the configuration file `/opt/dolphinscheduler/conf
## How to support Python 3? ## How to support Python 3?
Worker service is installed with default `Python3`,you can add environment variables `PYTHON_HOME=/usr/bin/python3` Worker service is installed with default `Python3`,you can add environment variables `PYTHON_LAUNCHER=/usr/bin/python3`
## How to support Hadoop, Spark, DataX ? ## How to support Hadoop, Spark, DataX ?

4
docs/docs/en/guide/task/datax.md

@ -2,9 +2,9 @@
## Overview ## Overview
DataX task type for executing DataX programs. For DataX nodes, the worker will execute `${DATAX_HOME}/bin/datax.py` to analyze the input json file. DataX task type for executing DataX programs. For DataX nodes, the worker will execute `${DATAX_LAUNCHER}` to analyze the input json file.
By default, the datax.py will be executed by python2.7, if you want to use other python version, you can set the `DATAX_PYTHON` environment variable to specify a version. Please set environment name `PYTHON_LAUNCHER` before you run datax task, some of datax release version only support `python2.7`
## Create Task ## Create Task

2
docs/docs/en/guide/task/openmldb.md

@ -54,6 +54,6 @@ to a quick start.
The OpenMLDB task will use OpenMLDB Python SDK to connect OpenMLDB cluster. So you should have the Python env. The OpenMLDB task will use OpenMLDB Python SDK to connect OpenMLDB cluster. So you should have the Python env.
We will use `python3` by default. You can set `PYTHON_HOME` to use your custom python env. We will use `python3` by default. You can set `PYTHON_LAUNCHER` to use your custom python env.
Make sure you have installed OpenMLDB Python SDK in the host where the worker server running, using `pip install openmldb`. Make sure you have installed OpenMLDB Python SDK in the host where the worker server running, using `pip install openmldb`.

6
docs/docs/en/guide/task/pytorch.md

@ -41,12 +41,12 @@ The preceding two parameters are used to minimize the running of the configurati
*no* *no*
- **Python Command Path** :Such as `/usr/bin/python`,The default value is ${PYTHON_HOME} in environment. - **Python Command Path** :Such as `/usr/bin/python`,The default value is ${PYTHON_LAUNCHER} in environment.
*yes* *yes*
- **Python Environment Manager Tool** :You can choose `virtualenv` or `conda`. - **Python Environment Manager Tool** :You can choose `virtualenv` or `conda`.
- if choose `virtualenv`,that may use `virtualenv` to create a new environment. Use command `virtualenv -p ${PYTHON_HOME} venv`. - if choose `virtualenv`,that may use `virtualenv` to create a new environment. Use command `virtualenv -p ${PYTHON_LAUNCHER} venv`.
- if choose `conda`, ,that may use `conda` to create a new environment,And you need to specify the Python version. - if choose `conda`, ,that may use `conda` to create a new environment,And you need to specify the Python version.
- **Requirement File** :The default is requirements.txt。 - **Requirement File** :The default is requirements.txt。
@ -86,7 +86,7 @@ It applies to a new environment to run the project. You need to create an enviro
```shell ```shell
# Add the directory for the virtualenv command # Add the directory for the virtualenv command
export PATH=/home/xxx/anaconda3/bin:$PATH export PATH=/home/xxx/anaconda3/bin:$PATH
export PYTHON_HOME=/usr/local/bin/python3.7 export PYTHON_LAUNCHER=/usr/local/bin/python3.7
``` ```
## Other ## Other

1
docs/docs/en/guide/upgrade/incompatible.md

@ -4,6 +4,7 @@ This document records the incompatible updates between each version. You need to
## dev ## dev
* Change env `PYTHON_HOME` to `PYTHON_LAUNCHER` and `DATAX_HOME` to `DATAX_LAUNCHER` ([#14523](https://github.com/apache/dolphinscheduler/pull/14523))
* Change regex matching sql params in SQL task plugin ([#13378](https://github.com/apache/dolphinscheduler/pull/13378)) * Change regex matching sql params in SQL task plugin ([#13378](https://github.com/apache/dolphinscheduler/pull/13378))
* Remove the spark version of spark task ([#11860](https://github.com/apache/dolphinscheduler/pull/11860)). * Remove the spark version of spark task ([#11860](https://github.com/apache/dolphinscheduler/pull/11860)).
* Change the default unix shell executor from sh to bash ([#12180](https://github.com/apache/dolphinscheduler/pull/12180)). * Change the default unix shell executor from sh to bash ([#12180](https://github.com/apache/dolphinscheduler/pull/12180)).

6
docs/docs/zh/architecture/configuration.md

@ -365,12 +365,12 @@ export JAVA_HOME=${JAVA_HOME:-/opt/soft/java}
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark} export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/opt/soft/python}
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log
export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/*

8
docs/docs/zh/faq.md

@ -199,16 +199,16 @@ A: 1,在 **流程定义列表**,点击 **启动** 按钮
## Q:Python 任务设置 Python 版本 ## Q:Python 任务设置 Python 版本
A: 只需要修改 `bin/env/dolphinscheduler_env.sh` 中的 PYTHON_HOME A: 只需要修改 `bin/env/dolphinscheduler_env.sh` 中的 PYTHON_LAUNCHER
``` ```
export PYTHON_HOME=/bin/python export PYTHON_LAUNCHER=/bin/python
``` ```
注意:这了 **PYTHON_HOME** ,是 python 命令的绝对路径,而不是单纯的 PYTHON_HOME,还需要注意的是 export PATH 的时候,需要直接 注意:这了 **PYTHON_LAUNCHER** ,是 python 命令的绝对路径,而不是单纯的 PYTHON_LAUNCHER,还需要注意的是 export PATH 的时候,需要直接
``` ```
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH
``` ```
## Q:Worker Task 通过 sudo -u 租户 sh xxx.command 会产生子进程,在 kill 的时候,是否会杀掉 ## Q:Worker Task 通过 sudo -u 租户 sh xxx.command 会产生子进程,在 kill 的时候,是否会杀掉

6
docs/docs/zh/guide/expansion-reduction.md

@ -80,12 +80,12 @@ sed -i 's/Defaults requirett/#Defaults requirett/g' /etc/sudoers
export HADOOP_HOME=/opt/soft/hadoop export HADOOP_HOME=/opt/soft/hadoop
export HADOOP_CONF_DIR=/opt/soft/hadoop/etc/hadoop export HADOOP_CONF_DIR=/opt/soft/hadoop/etc/hadoop
export SPARK_HOME=/opt/soft/spark export SPARK_HOME=/opt/soft/spark
export PYTHON_HOME=/opt/soft/python export PYTHON_LAUNCHER=/opt/soft/python/bin/python3
export JAVA_HOME=/opt/soft/java export JAVA_HOME=/opt/soft/java
export HIVE_HOME=/opt/soft/hive export HIVE_HOME=/opt/soft/hive
export FLINK_HOME=/opt/soft/flink export FLINK_HOME=/opt/soft/flink
export DATAX_HOME=/opt/soft/datax/bin/datax.py export DATAX_LAUNCHER=/opt/soft/datax/bin/datax.py
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_HOME:$PATH export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
``` ```

6
docs/docs/zh/guide/installation/kubernetes.md

@ -371,7 +371,7 @@ docker build -t apache/dolphinscheduler-worker:python3 .
4. 修改 `values.yaml` 文件中 image 的 `repository` 字段,并更新 `tag``python3` 4. 修改 `values.yaml` 文件中 image 的 `repository` 字段,并更新 `tag``python3`
5. 修改 `values.yaml` 文件中的 `PYTHON_HOME``/usr/bin/python3` 5. 修改 `values.yaml` 文件中的 `PYTHON_LAUNCHER``/usr/bin/python3`
6. 部署 dolphinscheduler (详见**安装 dolphinscheduler**) 6. 部署 dolphinscheduler (详见**安装 dolphinscheduler**)
@ -574,11 +574,11 @@ common:
| `common.configmap.HADOOP_HOME` | Set `HADOOP_HOME` for DolphinScheduler's task environment | `/opt/soft/hadoop` | | `common.configmap.HADOOP_HOME` | Set `HADOOP_HOME` for DolphinScheduler's task environment | `/opt/soft/hadoop` |
| `common.configmap.HADOOP_CONF_DIR` | Set `HADOOP_CONF_DIR` for DolphinScheduler's task environment | `/opt/soft/hadoop/etc/hadoop` | | `common.configmap.HADOOP_CONF_DIR` | Set `HADOOP_CONF_DIR` for DolphinScheduler's task environment | `/opt/soft/hadoop/etc/hadoop` |
| `common.configmap.SPARK_HOME` | Set `SPARK_HOME` for DolphinScheduler's task environment | `/opt/soft/spark` | | `common.configmap.SPARK_HOME` | Set `SPARK_HOME` for DolphinScheduler's task environment | `/opt/soft/spark` |
| `common.configmap.PYTHON_HOME` | Set `PYTHON_HOME` for DolphinScheduler's task environment | `/usr/bin/python` | | `common.configmap.PYTHON_LAUNCHER` | Set `PYTHON_LAUNCHER` for DolphinScheduler's task environment | `/usr/bin/python` |
| `common.configmap.JAVA_HOME` | Set `JAVA_HOME` for DolphinScheduler's task environment | `/opt/java/openjdk` | | `common.configmap.JAVA_HOME` | Set `JAVA_HOME` for DolphinScheduler's task environment | `/opt/java/openjdk` |
| `common.configmap.HIVE_HOME` | Set `HIVE_HOME` for DolphinScheduler's task environment | `/opt/soft/hive` | | `common.configmap.HIVE_HOME` | Set `HIVE_HOME` for DolphinScheduler's task environment | `/opt/soft/hive` |
| `common.configmap.FLINK_HOME` | Set `FLINK_HOME` for DolphinScheduler's task environment | `/opt/soft/flink` | | `common.configmap.FLINK_HOME` | Set `FLINK_HOME` for DolphinScheduler's task environment | `/opt/soft/flink` |
| `common.configmap.DATAX_HOME` | Set `DATAX_HOME` for DolphinScheduler's task environment | `/opt/soft/datax` | | `common.configmap.DATAX_LAUNCHER` | Set `DATAX_LAUNCHER` for DolphinScheduler's task environment | `/opt/soft/datax` |
| `common.sharedStoragePersistence.enabled` | Set `common.sharedStoragePersistence.enabled` to `true` to mount a shared storage volume for Hadoop, Spark binary and etc | `false` | | `common.sharedStoragePersistence.enabled` | Set `common.sharedStoragePersistence.enabled` to `true` to mount a shared storage volume for Hadoop, Spark binary and etc | `false` |
| `common.sharedStoragePersistence.mountPath` | The mount path for the shared storage volume | `/opt/soft` | | `common.sharedStoragePersistence.mountPath` | The mount path for the shared storage volume | `/opt/soft` |
| `common.sharedStoragePersistence.accessModes` | `PersistentVolumeClaim` access modes, must be `ReadWriteMany` | `[ReadWriteMany]` | | `common.sharedStoragePersistence.accessModes` | `PersistentVolumeClaim` access modes, must be `ReadWriteMany` | `[ReadWriteMany]` |

6
docs/docs/zh/guide/installation/pseudo-cluster.md

@ -128,12 +128,12 @@ export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-lo
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop} export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark} export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python} export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/opt/soft/python}
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive} export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink} export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax} export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/python3}
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
``` ```
## 初始化数据库 ## 初始化数据库

2
docs/docs/zh/guide/integration/rainbond.md

@ -47,7 +47,7 @@ API 和 Worker 服务共用 `/opt/dolphinscheduler/conf/common.properties` ,
## 如何支持 Python 3? ## 如何支持 Python 3?
Worker 服务默认安装了 Python3,使用时可以添加环境变量 `PYTHON_HOME=/usr/bin/python3` Worker 服务默认安装了 Python3,使用时可以添加环境变量 `PYTHON_LAUNCHER=/usr/bin/python3`
## 如何支持 Hadoop, Spark, DataX 等? ## 如何支持 Hadoop, Spark, DataX 等?

4
docs/docs/zh/guide/task/datax.md

@ -2,9 +2,9 @@
## 综述 ## 综述
DataX 任务类型,用于执行 DataX 程序。对于 DataX 节点,worker 会通过执行 `${DATAX_HOME}/bin/datax.py` 来解析传入的 json 文件。 DataX 任务类型,用于执行 DataX 程序。对于 DataX 节点,worker 会通过执行 `${DATAX_LAUNCHER}` 来解析传入的 json 文件。
默认会使用python2.7去执行datax.py,如果需要使用其他版本的python去执行datax.py,需要在环境变量中配置`DATAX_PYTHON` 执行 datax 任务前需要在环境变量中配置`PYTHON_LAUNCHER`,部分 datax 发行版只支持 python2.7
## 创建任务 ## 创建任务

2
docs/docs/zh/guide/task/openmldb.md

@ -51,6 +51,6 @@ OpenMLDB任务组件可以连接OpenMLDB集群执行任务。
OpenMLDB任务组件将使用OpenMLDB Python SDK来连接OpenMLDB。所以你需要Python环境。 OpenMLDB任务组件将使用OpenMLDB Python SDK来连接OpenMLDB。所以你需要Python环境。
我们默认使用`python3`,你可以通过配置`PYTHON_HOME`来设置自己的Python环境。 我们默认使用`python3`,你可以通过配置`PYTHON_LAUNCHER`来设置自己的Python环境。
请确保已通过`pip install openmldb`,在worker server的主机中安装了OpenMLDB Python SDK。 请确保已通过`pip install openmldb`,在worker server的主机中安装了OpenMLDB Python SDK。

14
docs/docs/zh/guide/task/pytorch.md

@ -41,12 +41,12 @@
#### python环境参数 #### python环境参数
| **任务参数** | **描述** | | **任务参数** | **描述** |
|--------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------| |--------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 是否创建新环境 | 是否创建新的python环境来运行该任务 | | 是否创建新环境 | 是否创建新的python环境来运行该任务 |
| python命令路径 | 如`/usr/bin/python`,默认为DS环境配置中的`${PYTHON_HOME}` | | python命令路径 | 如`/usr/bin/python`,默认为DS环境配置中的`${PYTHON_LAUNCHER}` |
| python环境管理工具 | 可以选择virtualenv或者conda,若选择`virtualenv`,则会用`virtualenv`创建一个新环境,使用命令 `virtualenv -p ${PYTHON_HOME} venv` 创建;若选择`conda`, 则会使用`conda` 创建一个新环境,并需要指定创建的python版本 | | python环境管理工具 | 可以选择virtualenv或者conda,若选择`virtualenv`,则会用`virtualenv`创建一个新环境,使用命令 `virtualenv -p ${PYTHON_LAUNCHER} venv` 创建;若选择`conda`, 则会使用`conda` 创建一个新环境,并需要指定创建的python版本 |
| 依赖文件 | 默认为 requirements.txt | | 依赖文件 | 默认为 requirements.txt |
配置了`python项目地址`参数,那么`python脚本`和`依赖文件`参数允许输入相对路径 配置了`python项目地址`参数,那么`python脚本`和`依赖文件`参数允许输入相对路径
@ -84,7 +84,7 @@ export PATH=$HOME/anaconda3/bin:$PATH
```shell ```shell
# virtualenv命令对应的目录加入PATH中 # virtualenv命令对应的目录加入PATH中
export PATH=/home/lucky/anaconda3/bin:$PATH export PATH=/home/lucky/anaconda3/bin:$PATH
export PYTHON_HOME=/usr/local/bin/python3.7 export PYTHON_LAUNCHER=/usr/local/bin/python3.7
``` ```
## 其他 ## 其他

1
docs/docs/zh/guide/upgrade/incompatible.md

@ -4,6 +4,7 @@
## dev ## dev
* 更改了环境变量名称,将 `PYTHON_HOME` 改为 `PYTHON_LAUNCHER`, 将 `DATAX_HOME` 改为 `DATAX_LAUNCHER` ([#14523](https://github.com/apache/dolphinscheduler/pull/14523))
* 更新了SQL任务中用于匹配变量的正则表达式 ([#13378](https://github.com/apache/dolphinscheduler/pull/13378)) * 更新了SQL任务中用于匹配变量的正则表达式 ([#13378](https://github.com/apache/dolphinscheduler/pull/13378))
* Remove the spark version of spark task ([#11860](https://github.com/apache/dolphinscheduler/pull/11860)). * Remove the spark version of spark task ([#11860](https://github.com/apache/dolphinscheduler/pull/11860)).
* Change the default unix shell executor from sh to bash ([#12180](https://github.com/apache/dolphinscheduler/pull/12180)). * Change the default unix shell executor from sh to bash ([#12180](https://github.com/apache/dolphinscheduler/pull/12180)).

6
dolphinscheduler-api/src/test/java/org/apache/dolphinscheduler/api/service/EnvironmentServiceTest.java

@ -296,14 +296,14 @@ public class EnvironmentServiceTest {
return "export HADOOP_HOME=/opt/hadoop-2.6.5\n" return "export HADOOP_HOME=/opt/hadoop-2.6.5\n"
+ "export HADOOP_CONF_DIR=/etc/hadoop/conf\n" + "export HADOOP_CONF_DIR=/etc/hadoop/conf\n"
+ "export SPARK_HOME=/opt/soft/spark\n" + "export SPARK_HOME=/opt/soft/spark\n"
+ "export PYTHON_HOME=/opt/soft/python\n" + "export PYTHON_LAUNCHER=/opt/soft/python/bin/python3\n"
+ "export JAVA_HOME=/opt/java/jdk1.8.0_181-amd64\n" + "export JAVA_HOME=/opt/java/jdk1.8.0_181-amd64\n"
+ "export HIVE_HOME=/opt/soft/hive\n" + "export HIVE_HOME=/opt/soft/hive\n"
+ "export FLINK_HOME=/opt/soft/flink\n" + "export FLINK_HOME=/opt/soft/flink\n"
+ "export DATAX_HOME=/opt/soft/datax\n" + "export DATAX_LAUNCHER=/opt/soft/datax/bin/python3\n"
+ "export YARN_CONF_DIR=\"/etc/hadoop/conf\"\n" + "export YARN_CONF_DIR=\"/etc/hadoop/conf\"\n"
+ "\n" + "\n"
+ "export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH\n" + "export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH\n"
+ "\n" + "\n"
+ "export HADOOP_CLASSPATH=`hadoop classpath`\n" + "export HADOOP_CLASSPATH=`hadoop classpath`\n"
+ "\n" + "\n"

6
dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/mapper/ClusterMapperTest.java

@ -174,14 +174,14 @@ public class ClusterMapperTest extends BaseDaoTest {
return "export HADOOP_HOME=/opt/hadoop-2.6.5\n" return "export HADOOP_HOME=/opt/hadoop-2.6.5\n"
+ "export HADOOP_CONF_DIR=/etc/hadoop/conf\n" + "export HADOOP_CONF_DIR=/etc/hadoop/conf\n"
+ "export SPARK_HOME=/opt/soft/spark\n" + "export SPARK_HOME=/opt/soft/spark\n"
+ "export PYTHON_HOME=/opt/soft/python\n" + "export PYTHON_LAUNCHER=/opt/soft/python/bin/python3\n"
+ "export JAVA_HOME=/opt/java/jdk1.8.0_181-amd64\n" + "export JAVA_HOME=/opt/java/jdk1.8.0_181-amd64\n"
+ "export HIVE_HOME=/opt/soft/hive\n" + "export HIVE_HOME=/opt/soft/hive\n"
+ "export FLINK_HOME=/opt/soft/flink\n" + "export FLINK_HOME=/opt/soft/flink\n"
+ "export DATAX_HOME=/opt/soft/datax\n" + "export DATAX_LAUNCHER=/opt/soft/datax/bin/python3\n"
+ "export YARN_CONF_DIR=\"/etc/hadoop/conf\"\n" + "export YARN_CONF_DIR=\"/etc/hadoop/conf\"\n"
+ "\n" + "\n"
+ "export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH\n" + "export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH\n"
+ "\n" + "\n"
+ "export HADOOP_CLASSPATH=`hadoop classpath`\n" + "export HADOOP_CLASSPATH=`hadoop classpath`\n"
+ "\n" + "\n"

6
dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/mapper/EnvironmentMapperTest.java

@ -174,14 +174,14 @@ public class EnvironmentMapperTest extends BaseDaoTest {
return "export HADOOP_HOME=/opt/hadoop-2.6.5\n" return "export HADOOP_HOME=/opt/hadoop-2.6.5\n"
+ "export HADOOP_CONF_DIR=/etc/hadoop/conf\n" + "export HADOOP_CONF_DIR=/etc/hadoop/conf\n"
+ "export SPARK_HOME=/opt/soft/spark\n" + "export SPARK_HOME=/opt/soft/spark\n"
+ "export PYTHON_HOME=/opt/soft/python\n" + "export PYTHON_LAUNCHER=/opt/soft/python/bin/python3\n"
+ "export JAVA_HOME=/opt/java/jdk1.8.0_181-amd64\n" + "export JAVA_HOME=/opt/java/jdk1.8.0_181-amd64\n"
+ "export HIVE_HOME=/opt/soft/hive\n" + "export HIVE_HOME=/opt/soft/hive\n"
+ "export FLINK_HOME=/opt/soft/flink\n" + "export FLINK_HOME=/opt/soft/flink\n"
+ "export DATAX_HOME=/opt/soft/datax\n" + "export DATAX_LAUNCHER=/opt/soft/datax/bin/python3\n"
+ "export YARN_CONF_DIR=\"/etc/hadoop/conf\"\n" + "export YARN_CONF_DIR=\"/etc/hadoop/conf\"\n"
+ "\n" + "\n"
+ "export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH\n" + "export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH\n"
+ "\n" + "\n"
+ "export HADOOP_CLASSPATH=`hadoop classpath`\n" + "export HADOOP_CLASSPATH=`hadoop classpath`\n"
+ "\n" + "\n"

2
dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/utils/TaskCacheUtilsTest.java

@ -167,7 +167,7 @@ class TaskCacheUtilsTest {
// task definition version is changed, so cache key changed // task definition version is changed, so cache key changed
Assertions.assertNotEquals(cacheKeyD, cacheKeyE); Assertions.assertNotEquals(cacheKeyD, cacheKeyE);
taskInstance.setEnvironmentConfig("export PYTHON_HOME=/bin/python3"); taskInstance.setEnvironmentConfig("export PYTHON_LAUNCHER=/bin/python3");
String cacheKeyF = TaskCacheUtils.generateCacheKey(taskInstance, taskExecutionContext, storageOperate); String cacheKeyF = TaskCacheUtils.generateCacheKey(taskInstance, taskExecutionContext, storageOperate);
// EnvironmentConfig is changed, so cache key changed // EnvironmentConfig is changed, so cache key changed
Assertions.assertNotEquals(cacheKeyE, cacheKeyF); Assertions.assertNotEquals(cacheKeyE, cacheKeyF);

12
dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/main/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTask.java

@ -54,7 +54,6 @@ import java.sql.SQLException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import com.alibaba.druid.sql.ast.SQLStatement; import com.alibaba.druid.sql.ast.SQLStatement;
@ -74,14 +73,13 @@ public class DataxTask extends AbstractTask {
/** /**
* jvm parameters * jvm parameters
*/ */
public static final String JVM_PARAM = " --jvm=\"-Xms%sG -Xmx%sG\" "; public static final String JVM_PARAM = "--jvm=\"-Xms%sG -Xmx%sG\" ";
public static final String CUSTOM_PARAM = " -D%s='%s'"; public static final String CUSTOM_PARAM = " -D%s='%s'";
/** /**
* python process(datax only supports version 2.7 by default)
* todo: Create a shell script to execute the datax task, and read the python version from the env, so we can support multiple versions of datax python * todo: Create a shell script to execute the datax task, and read the python version from the env, so we can support multiple versions of datax python
*/ */
private static final String DATAX_PYTHON = Optional.ofNullable(System.getenv("DATAX_PYTHON")).orElse("python2.7"); private static final String PYTHON_LAUNCHER = "${PYTHON_LAUNCHER}";
/** /**
* select all * select all
@ -95,7 +93,7 @@ public class DataxTask extends AbstractTask {
/** /**
* datax path * datax path
*/ */
private static final String DATAX_PATH = "${DATAX_HOME}/bin/datax.py"; private static final String DATAX_LAUNCHER = "${DATAX_LAUNCHER}";
/** /**
* datax channel count * datax channel count
*/ */
@ -380,9 +378,9 @@ public class DataxTask extends AbstractTask {
*/ */
protected String buildCommand(String jobConfigFilePath, Map<String, Property> paramsMap) { protected String buildCommand(String jobConfigFilePath, Map<String, Property> paramsMap) {
// datax python command // datax python command
return DATAX_PYTHON + return PYTHON_LAUNCHER +
" " + " " +
DATAX_PATH + DATAX_LAUNCHER +
" " + " " +
loadJvmEnv(dataXParameters) + loadJvmEnv(dataXParameters) +
addCustomParameters(paramsMap) + addCustomParameters(paramsMap) +

6
dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/test/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTaskTest.java

@ -122,7 +122,7 @@ public class DataxTaskTest {
Assertions.assertTrue(delete); Assertions.assertTrue(delete);
Assertions.assertEquals(dataxTask.buildCommand("/tmp/execution/app-id_job.json", null), Assertions.assertEquals(dataxTask.buildCommand("/tmp/execution/app-id_job.json", null),
"python2.7 ${DATAX_HOME}/bin/datax.py --jvm=\"-Xms1G -Xmx1G\" /tmp/execution/app-id_job.json"); "${PYTHON_LAUNCHER} ${DATAX_LAUNCHER} --jvm=\"-Xms1G -Xmx1G\" /tmp/execution/app-id_job.json");
} }
@Test @Test
@ -157,7 +157,7 @@ public class DataxTaskTest {
Assertions.assertTrue(delete); Assertions.assertTrue(delete);
Assertions.assertEquals(dataxTask.buildCommand("/tmp/execution/app-id_job.json", createPrepareParamsMap()), Assertions.assertEquals(dataxTask.buildCommand("/tmp/execution/app-id_job.json", createPrepareParamsMap()),
"python2.7 ${DATAX_HOME}/bin/datax.py --jvm=\"-Xms1G -Xmx1G\" -p \"-DDT='DT' -DDS='DS'\" /tmp/execution/app-id_job.json"); "${PYTHON_LAUNCHER} ${DATAX_LAUNCHER} --jvm=\"-Xms1G -Xmx1G\" -p \"-DDT='DT' -DDS='DS'\" /tmp/execution/app-id_job.json");
} }
@Test @Test
@ -237,7 +237,7 @@ public class DataxTaskTest {
DataxParameters dataXParameters = createDataxParameters(); DataxParameters dataXParameters = createDataxParameters();
dataXParameters.setXms(3); dataXParameters.setXms(3);
dataXParameters.setXmx(4); dataXParameters.setXmx(4);
Assertions.assertEquals(dataxTask.loadJvmEnv(dataXParameters), " --jvm=\"-Xms3G -Xmx4G\" "); Assertions.assertEquals(dataxTask.loadJvmEnv(dataXParameters), "--jvm=\"-Xms3G -Xmx4G\" ");
} }
private DataxParameters createDataxParameters() { private DataxParameters createDataxParameters() {

4
dolphinscheduler-task-plugin/dolphinscheduler-task-openmldb/src/main/java/org/apache/dolphinscheduler/plugin/task/openmldb/OpenmldbTask.java

@ -132,7 +132,7 @@ public class OpenmldbTask extends PythonTask {
/** /**
* Build the python task command. * Build the python task command.
* If user have set the 'PYTHON_HOME' environment, we will use the 'PYTHON_HOME', * If user have set the 'PYTHON_LAUNCHER' environment, we will use the 'PYTHON_LAUNCHER',
* if not, we will default use python. * if not, we will default use python.
* *
* @param pythonFile Python file, cannot be empty. * @param pythonFile Python file, cannot be empty.
@ -145,7 +145,7 @@ public class OpenmldbTask extends PythonTask {
} }
private String getPythonCommand() { private String getPythonCommand() {
String pythonHome = System.getenv(PYTHON_HOME); String pythonHome = System.getenv(PYTHON_LAUNCHER);
return getPythonCommand(pythonHome); return getPythonCommand(pythonHome);
} }

35
dolphinscheduler-task-plugin/dolphinscheduler-task-python/src/main/java/org/apache/dolphinscheduler/plugin/task/python/PythonConstants.java

@ -1,35 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.plugin.task.python;
public class PythonConstants {
private PythonConstants() {
throw new IllegalStateException("Utility class");
}
/**
* python home
*/
public static final String PYTHON_HOME = "PYTHON_HOME";
/**
* EQUAL SIGN
*/
public static final String EQUAL_SIGN = "=";
}

8
dolphinscheduler-task-plugin/dolphinscheduler-task-python/src/main/java/org/apache/dolphinscheduler/plugin/task/python/PythonTask.java

@ -59,9 +59,7 @@ public class PythonTask extends AbstractTask {
protected TaskExecutionContext taskRequest; protected TaskExecutionContext taskRequest;
protected static final String PYTHON_HOME = "PYTHON_HOME"; protected static final String PYTHON_LAUNCHER = "PYTHON_LAUNCHER";
private static final String DEFAULT_PYTHON_VERSION = "python";
/** /**
* constructor * constructor
@ -185,7 +183,7 @@ public class PythonTask extends AbstractTask {
/** /**
* Build the python task command. * Build the python task command.
* If user have set the 'PYTHON_HOME' environment, we will use the 'PYTHON_HOME', * If user have set the 'PYTHON_LAUNCHER' environment, we will use the 'PYTHON_LAUNCHER',
* if not, we will default use python. * if not, we will default use python.
* *
* @param pythonFile Python file, cannot be empty. * @param pythonFile Python file, cannot be empty.
@ -194,7 +192,7 @@ public class PythonTask extends AbstractTask {
protected String buildPythonExecuteCommand(String pythonFile) { protected String buildPythonExecuteCommand(String pythonFile) {
Preconditions.checkNotNull(pythonFile, "Python file cannot be null"); Preconditions.checkNotNull(pythonFile, "Python file cannot be null");
String pythonHome = String.format("${%s}", PYTHON_HOME); String pythonHome = String.format("${%s}", PYTHON_LAUNCHER);
return pythonHome + " " + pythonFile; return pythonHome + " " + pythonFile;
} }

2
dolphinscheduler-task-plugin/dolphinscheduler-task-python/src/test/java/org/apache/dolphinscheduler/plugin/task/python/PythonTaskTest.java

@ -25,7 +25,7 @@ public class PythonTaskTest {
@Test @Test
public void buildPythonExecuteCommand() throws Exception { public void buildPythonExecuteCommand() throws Exception {
PythonTask pythonTask = createPythonTask(); PythonTask pythonTask = createPythonTask();
Assertions.assertEquals("${PYTHON_HOME} test.py", pythonTask.buildPythonExecuteCommand("test.py")); Assertions.assertEquals("${PYTHON_LAUNCHER} test.py", pythonTask.buildPythonExecuteCommand("test.py"));
} }
private PythonTask createPythonTask() { private PythonTask createPythonTask() {

2
dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/main/java/org/apache/dolphinscheduler/plugin/task/pytorch/PythonEnvManager.java

@ -38,7 +38,7 @@ public class PythonEnvManager {
private static final String VIRTUALENV_SOURCE = "source %s/bin/activate"; private static final String VIRTUALENV_SOURCE = "source %s/bin/activate";
private static final String VIRTUALENV_BUILD = "virtualenv -p ${PYTHON_HOME} %s"; private static final String VIRTUALENV_BUILD = "virtualenv -p ${PYTHON_LAUNCHER} %s";
private static final String INSTALL_COMMAND = "%s -m pip install -r %s"; private static final String INSTALL_COMMAND = "%s -m pip install -r %s";

10
dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/main/java/org/apache/dolphinscheduler/plugin/task/pytorch/PytorchParameters.java

@ -35,7 +35,7 @@ public class PytorchParameters extends AbstractParameters {
private String pythonPath = "."; private String pythonPath = ".";
private String script; private String script;
private String scriptParams; private String scriptParams;
private String pythonCommand = "${PYTHON_HOME}"; private String pythonLauncher = "${PYTHON_LAUNCHER}";
private String pythonEnvTool = PythonEnvManager.ENV_TOOL_VENV; private String pythonEnvTool = PythonEnvManager.ENV_TOOL_VENV;
private String requirements = "requirements.txt"; private String requirements = "requirements.txt";
private String condaPythonVersion = "3.9"; private String condaPythonVersion = "3.9";
@ -58,12 +58,12 @@ public class PytorchParameters extends AbstractParameters {
return getPossiblePath(requirements); return getPossiblePath(requirements);
} }
public String getPythonCommand() { public String getPythonLauncher() {
String command; String command;
if (pythonCommand.isEmpty()) { if (pythonLauncher.isEmpty()) {
command = "${PYTHON_HOME}"; command = "${PYTHON_LAUNCHER}";
} else { } else {
command = pythonCommand; command = pythonLauncher;
} }
return command; return command;
} }

2
dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/main/java/org/apache/dolphinscheduler/plugin/task/pytorch/PytorchTask.java

@ -129,7 +129,7 @@ public class PytorchTask extends AbstractTask {
if (pytorchParameters.getIsCreateEnvironment()) { if (pytorchParameters.getIsCreateEnvironment()) {
pythonCommand = pythonEnvManager.getPythonCommand(); pythonCommand = pythonEnvManager.getPythonCommand();
} else { } else {
pythonCommand = pytorchParameters.getPythonCommand(); pythonCommand = pytorchParameters.getPythonLauncher();
} }
return pythonCommand; return pythonCommand;

14
dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/test/java/org/apache/dolphinscheduler/plugin/task/pytorch/PytorchTaskTest.java

@ -68,7 +68,7 @@ public class PytorchTaskTest {
envManager.setPythonEnvTool(PythonEnvManager.ENV_TOOL_VENV); envManager.setPythonEnvTool(PythonEnvManager.ENV_TOOL_VENV);
String venvEnvCommand = envManager.getBuildEnvCommand(requirementPath); String venvEnvCommand = envManager.getBuildEnvCommand(requirementPath);
Assertions.assertEquals(venvEnvCommand, Assertions.assertEquals(venvEnvCommand,
"virtualenv -p ${PYTHON_HOME} ./venv && source ./venv/bin/activate && ./venv/bin/python -m pip install -r " "virtualenv -p ${PYTHON_LAUNCHER} ./venv && source ./venv/bin/activate && ./venv/bin/python -m pip install -r "
+ requirementPath); + requirementPath);
} }
@ -103,15 +103,15 @@ public class PytorchTaskTest {
PytorchTask task1 = initTask(parameters); PytorchTask task1 = initTask(parameters);
Assertions.assertEquals(task1.buildPythonExecuteCommand(), Assertions.assertEquals(task1.buildPythonExecuteCommand(),
"export PYTHONPATH=.\n" + "export PYTHONPATH=.\n" +
"${PYTHON_HOME} main.py --epochs=1 --dry-run"); "${PYTHON_LAUNCHER} main.py --epochs=1 --dry-run");
parameters.setPythonCommand(""); parameters.setPythonLauncher("");
PytorchTask task2 = initTask(parameters); PytorchTask task2 = initTask(parameters);
Assertions.assertEquals(task2.buildPythonExecuteCommand(), Assertions.assertEquals(task2.buildPythonExecuteCommand(),
"export PYTHONPATH=.\n" + "export PYTHONPATH=.\n" +
"${PYTHON_HOME} main.py --epochs=1 --dry-run"); "${PYTHON_LAUNCHER} main.py --epochs=1 --dry-run");
parameters.setPythonCommand("/usr/bin/python"); parameters.setPythonLauncher("/usr/bin/python");
PytorchTask task3 = initTask(parameters); PytorchTask task3 = initTask(parameters);
Assertions.assertEquals(task3.buildPythonExecuteCommand(), Assertions.assertEquals(task3.buildPythonExecuteCommand(),
"export PYTHONPATH=.\n" + "export PYTHONPATH=.\n" +
@ -151,7 +151,7 @@ public class PytorchTaskTest {
PytorchTask task = initTask(parameters); PytorchTask task = initTask(parameters);
Assertions.assertEquals(task.buildPythonExecuteCommand(), Assertions.assertEquals(task.buildPythonExecuteCommand(),
"export PYTHONPATH=.\n" + "export PYTHONPATH=.\n" +
"virtualenv -p ${PYTHON_HOME} ./venv && source ./venv/bin/activate && ./venv/bin/python -m pip install -r requirements.txt\n" "virtualenv -p ${PYTHON_LAUNCHER} ./venv && source ./venv/bin/activate && ./venv/bin/python -m pip install -r requirements.txt\n"
+ +
"./venv/bin/python main.py --epochs=1 --dry-run"); "./venv/bin/python main.py --epochs=1 --dry-run");
@ -178,7 +178,7 @@ public class PytorchTaskTest {
createFile(scriptFile); createFile(scriptFile);
String expected = "export PYTHONPATH=%s\n" + String expected = "export PYTHONPATH=%s\n" +
"virtualenv -p ${PYTHON_HOME} ./venv && source ./venv/bin/activate && ./venv/bin/python -m pip install -r %s\n" "virtualenv -p ${PYTHON_LAUNCHER} ./venv && source ./venv/bin/activate && ./venv/bin/python -m pip install -r %s\n"
+ +
"./venv/bin/python %s"; "./venv/bin/python %s";
System.out.println(task.buildPythonExecuteCommand()); System.out.println(task.buildPythonExecuteCommand());

2
dolphinscheduler-ui/src/locales/en_US/project.ts

@ -847,7 +847,7 @@ export default {
pytorch_python_path: 'Project Path', pytorch_python_path: 'Project Path',
pytorch_is_create_environment: 'Create An Environment Or Not', pytorch_is_create_environment: 'Create An Environment Or Not',
pytorch_python_command: 'Python Command Path', pytorch_python_command: 'Python Command Path',
pytorch_python_command_tips: 'If empty,will be set $PYTHON_HOME', pytorch_python_command_tips: 'If empty,will be set $PYTHON_LAUNCHER',
pytorch_python_env_tool: 'Python Environment Manager Tool', pytorch_python_env_tool: 'Python Environment Manager Tool',
pytorch_requirements: 'Requirement File', pytorch_requirements: 'Requirement File',
pytorch_conda_python_version: 'Python Version', pytorch_conda_python_version: 'Python Version',

2
dolphinscheduler-ui/src/locales/zh_CN/project.ts

@ -823,7 +823,7 @@ export default {
pytorch_python_path: 'python项目地址', pytorch_python_path: 'python项目地址',
pytorch_is_create_environment: '是否创建新环境', pytorch_is_create_environment: '是否创建新环境',
pytorch_python_command: 'python命令路径', pytorch_python_command: 'python命令路径',
pytorch_python_command_tips: '若为空,则使用$PYTHON_HOME', pytorch_python_command_tips: '若为空,则使用$PYTHON_LAUNCHER',
pytorch_python_env_tool: 'python环境管理工具', pytorch_python_env_tool: 'python环境管理工具',
pytorch_requirements: '依赖文件', pytorch_requirements: '依赖文件',
pytorch_conda_python_version: 'python版本', pytorch_conda_python_version: 'python版本',

2
dolphinscheduler-ui/src/views/projects/task/components/node/tasks/use-pytorch.ts

@ -45,7 +45,7 @@ export function usePytorch({
timeout: 30, timeout: 30,
timeoutNotifyStrategy: ['WARN'], timeoutNotifyStrategy: ['WARN'],
pythonEnvTool: 'conda', pythonEnvTool: 'conda',
pythonCommand: '${PYTHON_HOME}', pythonCommand: '${PYTHON_LAUNCHER}',
condaPythonVersion: '3.7', condaPythonVersion: '3.7',
requirements: 'requirements.txt', requirements: 'requirements.txt',
pythonPath: '.' pythonPath: '.'

6
dolphinscheduler-ui/src/views/security/environment-manage/components/environment-modal.tsx

@ -31,13 +31,13 @@ const envConfigPlaceholder =
'export HADOOP_HOME=/opt/hadoop-2.6.5\n' + 'export HADOOP_HOME=/opt/hadoop-2.6.5\n' +
'export HADOOP_CONF_DIR=/etc/hadoop/conf\n' + 'export HADOOP_CONF_DIR=/etc/hadoop/conf\n' +
'export SPARK_HOME=/opt/soft/spark\n' + 'export SPARK_HOME=/opt/soft/spark\n' +
'export PYTHON_HOME=/opt/soft/python\n' + 'export PYTHON_LAUNCHER=/opt/soft/python/bin/python3\n' +
'export JAVA_HOME=/opt/java/jdk1.8.0_181-amd64\n' + 'export JAVA_HOME=/opt/java/jdk1.8.0_181-amd64\n' +
'export HIVE_HOME=/opt/soft/hive\n' + 'export HIVE_HOME=/opt/soft/hive\n' +
'export FLINK_HOME=/opt/soft/flink\n' + 'export FLINK_HOME=/opt/soft/flink\n' +
'export DATAX_HOME=/opt/soft/datax\n' + 'export DATAX_LAUNCHER=/opt/soft/datax/bin/datax.py\n' +
'export YARN_CONF_DIR=/etc/hadoop/conf\n' + 'export YARN_CONF_DIR=/etc/hadoop/conf\n' +
'export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH\n' + 'export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH\n' +
'export HADOOP_CLASSPATH=`hadoop classpath`\n' 'export HADOOP_CLASSPATH=`hadoop classpath`\n'
const EnvironmentModal = defineComponent({ const EnvironmentModal = defineComponent({

Loading…
Cancel
Save