Browse Source

impv: Correct datax execute and python execute script name (#14523)

* impv: Correct datax execute and python execute script name

we use PYTHON_LAUNCHER for python script execute and
DATAX_LAUNCHER for datax script name

* Add pr number

* fix ut

* style

---------

Co-authored-by: xiangzihao <460888207@qq.com>
3.2.1-prepare
Jay Chung 1 year ago committed by GitHub
parent
commit
de2d215aee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 6
      .github/workflows/cluster-test/mysql/dolphinscheduler_env.sh
  2. 6
      .github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh
  3. 2
      README.md
  4. 4
      deploy/kubernetes/dolphinscheduler/values.yaml
  5. 6
      docs/docs/en/architecture/configuration.md
  6. 8
      docs/docs/en/faq.md
  7. 6
      docs/docs/en/guide/expansion-reduction.md
  8. 6
      docs/docs/en/guide/installation/kubernetes.md
  9. 6
      docs/docs/en/guide/installation/pseudo-cluster.md
  10. 2
      docs/docs/en/guide/integration/rainbond.md
  11. 4
      docs/docs/en/guide/task/datax.md
  12. 2
      docs/docs/en/guide/task/openmldb.md
  13. 6
      docs/docs/en/guide/task/pytorch.md
  14. 1
      docs/docs/en/guide/upgrade/incompatible.md
  15. 6
      docs/docs/zh/architecture/configuration.md
  16. 8
      docs/docs/zh/faq.md
  17. 6
      docs/docs/zh/guide/expansion-reduction.md
  18. 6
      docs/docs/zh/guide/installation/kubernetes.md
  19. 6
      docs/docs/zh/guide/installation/pseudo-cluster.md
  20. 2
      docs/docs/zh/guide/integration/rainbond.md
  21. 4
      docs/docs/zh/guide/task/datax.md
  22. 2
      docs/docs/zh/guide/task/openmldb.md
  23. 14
      docs/docs/zh/guide/task/pytorch.md
  24. 1
      docs/docs/zh/guide/upgrade/incompatible.md
  25. 6
      dolphinscheduler-api/src/test/java/org/apache/dolphinscheduler/api/service/EnvironmentServiceTest.java
  26. 6
      dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/mapper/ClusterMapperTest.java
  27. 6
      dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/mapper/EnvironmentMapperTest.java
  28. 2
      dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/utils/TaskCacheUtilsTest.java
  29. 12
      dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/main/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTask.java
  30. 6
      dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/test/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTaskTest.java
  31. 4
      dolphinscheduler-task-plugin/dolphinscheduler-task-openmldb/src/main/java/org/apache/dolphinscheduler/plugin/task/openmldb/OpenmldbTask.java
  32. 35
      dolphinscheduler-task-plugin/dolphinscheduler-task-python/src/main/java/org/apache/dolphinscheduler/plugin/task/python/PythonConstants.java
  33. 8
      dolphinscheduler-task-plugin/dolphinscheduler-task-python/src/main/java/org/apache/dolphinscheduler/plugin/task/python/PythonTask.java
  34. 2
      dolphinscheduler-task-plugin/dolphinscheduler-task-python/src/test/java/org/apache/dolphinscheduler/plugin/task/python/PythonTaskTest.java
  35. 2
      dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/main/java/org/apache/dolphinscheduler/plugin/task/pytorch/PythonEnvManager.java
  36. 10
      dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/main/java/org/apache/dolphinscheduler/plugin/task/pytorch/PytorchParameters.java
  37. 2
      dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/main/java/org/apache/dolphinscheduler/plugin/task/pytorch/PytorchTask.java
  38. 14
      dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/test/java/org/apache/dolphinscheduler/plugin/task/pytorch/PytorchTaskTest.java
  39. 2
      dolphinscheduler-ui/src/locales/en_US/project.ts
  40. 2
      dolphinscheduler-ui/src/locales/zh_CN/project.ts
  41. 2
      dolphinscheduler-ui/src/views/projects/task/components/node/tasks/use-pytorch.ts
  42. 6
      dolphinscheduler-ui/src/views/security/environment-manage/components/environment-modal.tsx

6
.github/workflows/cluster-test/mysql/dolphinscheduler_env.sh

@ -39,12 +39,12 @@ export REGISTRY_ZOOKEEPER_BLOCK_UNTIL_CONNECTED=30000ms
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python}
export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/opt/soft/python/bin/python3}
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax}
export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
export MASTER_RESERVED_MEMORY=0.01
export WORKER_RESERVED_MEMORY=0.01

6
.github/workflows/cluster-test/postgresql/dolphinscheduler_env.sh

@ -39,12 +39,12 @@ export REGISTRY_ZOOKEEPER_BLOCK_UNTIL_CONNECTED=30000ms
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python}
export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/opt/soft/python/bin/python3}
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax}
export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
export MASTER_RESERVED_MEMORY=0.01
export WORKER_RESERVED_MEMORY=0.01

2
README.md

@ -76,3 +76,5 @@ DolphinScheduler enriches the <a href="https://landscape.cncf.io/?landscape=obse
</p >
/opt/soft/datax

4
deploy/kubernetes/dolphinscheduler/values.yaml

@ -258,11 +258,11 @@ common:
HADOOP_HOME: "/opt/soft/hadoop"
HADOOP_CONF_DIR: "/opt/soft/hadoop/etc/hadoop"
SPARK_HOME: "/opt/soft/spark"
PYTHON_HOME: "/usr/bin/python"
PYTHON_LAUNCHER: "/usr/bin/python/bin/python3"
JAVA_HOME: "/opt/java/openjdk"
HIVE_HOME: "/opt/soft/hive"
FLINK_HOME: "/opt/soft/flink"
DATAX_HOME: "/opt/soft/datax"
DATAX_LAUNCHER: "/opt/soft/datax/bin/datax.py"
## Shared storage persistence mounted into api, master and worker, such as Hadoop, Spark, Flink and DataX binary package
sharedStoragePersistence:
enabled: false

6
docs/docs/en/architecture/configuration.md

@ -373,12 +373,12 @@ export JAVA_HOME=${JAVA_HOME:-/opt/soft/java}
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python}
export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/opt/soft/python/bin/python3}
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax}
export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log
export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/*

8
docs/docs/en/faq.md

@ -220,16 +220,16 @@ A: 1, in **the process definition list**, click the **Start** button.
## Q : Python task setting Python version
A: 1,**for the version after 1.0.3** only need to modify PYTHON_HOME in `bin/env/dolphinscheduler_env.sh`
A: 1,**for the version after 1.0.3** only need to modify `$PYTHON_LAUNCHER` in `bin/env/dolphinscheduler_env.sh`
```
export PYTHON_HOME=/bin/python
export PYTHON_LAUNCHER=/bin/python/bin/python3
```
Note: This is **PYTHON_HOME** , which is the absolute path of the python command, not the simple PYTHON_HOME. Also note that when exporting the PATH, you need to directly
Note: This is **PYTHON_LAUNCHER** , which is the absolute path of the python command, not the simple PYTHON_LAUNCHER. Also note that when exporting the PATH, you need to directly
```
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH
```
2,For versions prior to 1.0.3, the Python task only supports the Python version of the system. It does not support specifying the Python version.

6
docs/docs/en/guide/expansion-reduction.md

@ -80,12 +80,12 @@ Attention:
export HADOOP_HOME=/opt/soft/hadoop
export HADOOP_CONF_DIR=/opt/soft/hadoop/etc/hadoop
export SPARK_HOME=/opt/soft/spark
export PYTHON_HOME=/opt/soft/python
export PYTHON_LAUNCHER=/opt/soft/python/bin/python3
export JAVA_HOME=/opt/soft/jav
export HIVE_HOME=/opt/soft/hive
export FLINK_HOME=/opt/soft/flink
export DATAX_HOME=/opt/soft/datax/bin/datax.py
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_HOME:$PATH
export DATAX_LAUNCHER=/opt/soft/datax/bin/datax.py
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
```

6
docs/docs/en/guide/installation/kubernetes.md

@ -372,7 +372,7 @@ docker build -t apache/dolphinscheduler-worker:python3 .
4. Modify image `repository` and update `tag` to `python3` in `values.yaml`.
5. Modify `PYTHON_HOME` to `/usr/bin/python3` in `values.yaml`.
5. Modify `PYTHON_LAUNCHER` to `/usr/bin/python3` in `values.yaml`.
6. Run a DolphinScheduler release in Kubernetes (See **Install DolphinScheduler**).
@ -575,11 +575,11 @@ common:
| `common.configmap.HADOOP_HOME` | Set `HADOOP_HOME` for DolphinScheduler's task environment | `/opt/soft/hadoop` |
| `common.configmap.HADOOP_CONF_DIR` | Set `HADOOP_CONF_DIR` for DolphinScheduler's task environment | `/opt/soft/hadoop/etc/hadoop` |
| `common.configmap.SPARK_HOME` | Set `SPARK_HOME` for DolphinScheduler's task environment | `/opt/soft/spark` |
| `common.configmap.PYTHON_HOME` | Set `PYTHON_HOME` for DolphinScheduler's task environment | `/usr/bin/python` |
| `common.configmap.PYTHON_LAUNCHER` | Set `PYTHON_LAUNCHER` for DolphinScheduler's task environment | `/usr/bin/python` |
| `common.configmap.JAVA_HOME` | Set `JAVA_HOME` for DolphinScheduler's task environment | `/opt/java/openjdk` |
| `common.configmap.HIVE_HOME` | Set `HIVE_HOME` for DolphinScheduler's task environment | `/opt/soft/hive` |
| `common.configmap.FLINK_HOME` | Set `FLINK_HOME` for DolphinScheduler's task environment | `/opt/soft/flink` |
| `common.configmap.DATAX_HOME` | Set `DATAX_HOME` for DolphinScheduler's task environment | `/opt/soft/datax` |
| `common.configmap.DATAX_LAUNCHER` | Set `DATAX_LAUNCHER` for DolphinScheduler's task environment | `/opt/soft/datax` |
| `common.sharedStoragePersistence.enabled` | Set `common.sharedStoragePersistence.enabled` to `true` to mount a shared storage volume for Hadoop, Spark binary and etc | `false` |
| `common.sharedStoragePersistence.mountPath` | The mount path for the shared storage volume | `/opt/soft` |
| `common.sharedStoragePersistence.accessModes` | `PersistentVolumeClaim` access modes, must be `ReadWriteMany` | `[ReadWriteMany]` |

6
docs/docs/en/guide/installation/pseudo-cluster.md

@ -133,12 +133,12 @@ export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-lo
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python}
export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/opt/soft/python/bin/python3}
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax}
export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
```
## Initialize the Database

2
docs/docs/en/guide/integration/rainbond.md

@ -47,7 +47,7 @@ API and Worker Services share the configuration file `/opt/dolphinscheduler/conf
## How to support Python 3?
Worker service is installed with default `Python3`,you can add environment variables `PYTHON_HOME=/usr/bin/python3`
Worker service is installed with default `Python3`,you can add environment variables `PYTHON_LAUNCHER=/usr/bin/python3`
## How to support Hadoop, Spark, DataX ?

4
docs/docs/en/guide/task/datax.md

@ -2,9 +2,9 @@
## Overview
DataX task type for executing DataX programs. For DataX nodes, the worker will execute `${DATAX_HOME}/bin/datax.py` to analyze the input json file.
DataX task type for executing DataX programs. For DataX nodes, the worker will execute `${DATAX_LAUNCHER}` to analyze the input json file.
By default, the datax.py will be executed by python2.7, if you want to use other python version, you can set the `DATAX_PYTHON` environment variable to specify a version.
Please set environment name `PYTHON_LAUNCHER` before you run datax task, some of datax release version only support `python2.7`
## Create Task

2
docs/docs/en/guide/task/openmldb.md

@ -54,6 +54,6 @@ to a quick start.
The OpenMLDB task will use OpenMLDB Python SDK to connect OpenMLDB cluster. So you should have the Python env.
We will use `python3` by default. You can set `PYTHON_HOME` to use your custom python env.
We will use `python3` by default. You can set `PYTHON_LAUNCHER` to use your custom python env.
Make sure you have installed OpenMLDB Python SDK in the host where the worker server running, using `pip install openmldb`.

6
docs/docs/en/guide/task/pytorch.md

@ -41,12 +41,12 @@ The preceding two parameters are used to minimize the running of the configurati
*no*
- **Python Command Path** :Such as `/usr/bin/python`,The default value is ${PYTHON_HOME} in environment.
- **Python Command Path** :Such as `/usr/bin/python`,The default value is ${PYTHON_LAUNCHER} in environment.
*yes*
- **Python Environment Manager Tool** :You can choose `virtualenv` or `conda`.
- if choose `virtualenv`,that may use `virtualenv` to create a new environment. Use command `virtualenv -p ${PYTHON_HOME} venv`.
- if choose `virtualenv`,that may use `virtualenv` to create a new environment. Use command `virtualenv -p ${PYTHON_LAUNCHER} venv`.
- if choose `conda`, ,that may use `conda` to create a new environment,And you need to specify the Python version.
- **Requirement File** :The default is requirements.txt。
@ -86,7 +86,7 @@ It applies to a new environment to run the project. You need to create an enviro
```shell
# Add the directory for the virtualenv command
export PATH=/home/xxx/anaconda3/bin:$PATH
export PYTHON_HOME=/usr/local/bin/python3.7
export PYTHON_LAUNCHER=/usr/local/bin/python3.7
```
## Other

1
docs/docs/en/guide/upgrade/incompatible.md

@ -4,6 +4,7 @@ This document records the incompatible updates between each version. You need to
## dev
* Change env `PYTHON_HOME` to `PYTHON_LAUNCHER` and `DATAX_HOME` to `DATAX_LAUNCHER` ([#14523](https://github.com/apache/dolphinscheduler/pull/14523))
* Change regex matching sql params in SQL task plugin ([#13378](https://github.com/apache/dolphinscheduler/pull/13378))
* Remove the spark version of spark task ([#11860](https://github.com/apache/dolphinscheduler/pull/11860)).
* Change the default unix shell executor from sh to bash ([#12180](https://github.com/apache/dolphinscheduler/pull/12180)).

6
docs/docs/zh/architecture/configuration.md

@ -365,12 +365,12 @@ export JAVA_HOME=${JAVA_HOME:-/opt/soft/java}
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python}
export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/opt/soft/python}
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax}
export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log
export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/*

8
docs/docs/zh/faq.md

@ -199,16 +199,16 @@ A: 1,在 **流程定义列表**,点击 **启动** 按钮
## Q:Python 任务设置 Python 版本
A: 只需要修改 `bin/env/dolphinscheduler_env.sh` 中的 PYTHON_HOME
A: 只需要修改 `bin/env/dolphinscheduler_env.sh` 中的 PYTHON_LAUNCHER
```
export PYTHON_HOME=/bin/python
export PYTHON_LAUNCHER=/bin/python
```
注意:这了 **PYTHON_HOME** ,是 python 命令的绝对路径,而不是单纯的 PYTHON_HOME,还需要注意的是 export PATH 的时候,需要直接
注意:这了 **PYTHON_LAUNCHER** ,是 python 命令的绝对路径,而不是单纯的 PYTHON_LAUNCHER,还需要注意的是 export PATH 的时候,需要直接
```
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH
```
## Q:Worker Task 通过 sudo -u 租户 sh xxx.command 会产生子进程,在 kill 的时候,是否会杀掉

6
docs/docs/zh/guide/expansion-reduction.md

@ -80,12 +80,12 @@ sed -i 's/Defaults requirett/#Defaults requirett/g' /etc/sudoers
export HADOOP_HOME=/opt/soft/hadoop
export HADOOP_CONF_DIR=/opt/soft/hadoop/etc/hadoop
export SPARK_HOME=/opt/soft/spark
export PYTHON_HOME=/opt/soft/python
export PYTHON_LAUNCHER=/opt/soft/python/bin/python3
export JAVA_HOME=/opt/soft/java
export HIVE_HOME=/opt/soft/hive
export FLINK_HOME=/opt/soft/flink
export DATAX_HOME=/opt/soft/datax/bin/datax.py
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_HOME:$PATH
export DATAX_LAUNCHER=/opt/soft/datax/bin/datax.py
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$PATH:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
```

6
docs/docs/zh/guide/installation/kubernetes.md

@ -371,7 +371,7 @@ docker build -t apache/dolphinscheduler-worker:python3 .
4. 修改 `values.yaml` 文件中 image 的 `repository` 字段,并更新 `tag``python3`
5. 修改 `values.yaml` 文件中的 `PYTHON_HOME``/usr/bin/python3`
5. 修改 `values.yaml` 文件中的 `PYTHON_LAUNCHER``/usr/bin/python3`
6. 部署 dolphinscheduler (详见**安装 dolphinscheduler**)
@ -574,11 +574,11 @@ common:
| `common.configmap.HADOOP_HOME` | Set `HADOOP_HOME` for DolphinScheduler's task environment | `/opt/soft/hadoop` |
| `common.configmap.HADOOP_CONF_DIR` | Set `HADOOP_CONF_DIR` for DolphinScheduler's task environment | `/opt/soft/hadoop/etc/hadoop` |
| `common.configmap.SPARK_HOME` | Set `SPARK_HOME` for DolphinScheduler's task environment | `/opt/soft/spark` |
| `common.configmap.PYTHON_HOME` | Set `PYTHON_HOME` for DolphinScheduler's task environment | `/usr/bin/python` |
| `common.configmap.PYTHON_LAUNCHER` | Set `PYTHON_LAUNCHER` for DolphinScheduler's task environment | `/usr/bin/python` |
| `common.configmap.JAVA_HOME` | Set `JAVA_HOME` for DolphinScheduler's task environment | `/opt/java/openjdk` |
| `common.configmap.HIVE_HOME` | Set `HIVE_HOME` for DolphinScheduler's task environment | `/opt/soft/hive` |
| `common.configmap.FLINK_HOME` | Set `FLINK_HOME` for DolphinScheduler's task environment | `/opt/soft/flink` |
| `common.configmap.DATAX_HOME` | Set `DATAX_HOME` for DolphinScheduler's task environment | `/opt/soft/datax` |
| `common.configmap.DATAX_LAUNCHER` | Set `DATAX_LAUNCHER` for DolphinScheduler's task environment | `/opt/soft/datax` |
| `common.sharedStoragePersistence.enabled` | Set `common.sharedStoragePersistence.enabled` to `true` to mount a shared storage volume for Hadoop, Spark binary and etc | `false` |
| `common.sharedStoragePersistence.mountPath` | The mount path for the shared storage volume | `/opt/soft` |
| `common.sharedStoragePersistence.accessModes` | `PersistentVolumeClaim` access modes, must be `ReadWriteMany` | `[ReadWriteMany]` |

6
docs/docs/zh/guide/installation/pseudo-cluster.md

@ -128,12 +128,12 @@ export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-lo
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME=${SPARK_HOME:-/opt/soft/spark}
export PYTHON_HOME=${PYTHON_HOME:-/opt/soft/python}
export PYTHON_LAUNCHER=${PYTHON_LAUNCHER:-/opt/soft/python}
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/opt/soft/datax}
export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/python3}
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
```
## 初始化数据库

2
docs/docs/zh/guide/integration/rainbond.md

@ -47,7 +47,7 @@ API 和 Worker 服务共用 `/opt/dolphinscheduler/conf/common.properties` ,
## 如何支持 Python 3?
Worker 服务默认安装了 Python3,使用时可以添加环境变量 `PYTHON_HOME=/usr/bin/python3`
Worker 服务默认安装了 Python3,使用时可以添加环境变量 `PYTHON_LAUNCHER=/usr/bin/python3`
## 如何支持 Hadoop, Spark, DataX 等?

4
docs/docs/zh/guide/task/datax.md

@ -2,9 +2,9 @@
## 综述
DataX 任务类型,用于执行 DataX 程序。对于 DataX 节点,worker 会通过执行 `${DATAX_HOME}/bin/datax.py` 来解析传入的 json 文件。
DataX 任务类型,用于执行 DataX 程序。对于 DataX 节点,worker 会通过执行 `${DATAX_LAUNCHER}` 来解析传入的 json 文件。
默认会使用python2.7去执行datax.py,如果需要使用其他版本的python去执行datax.py,需要在环境变量中配置`DATAX_PYTHON`
执行 datax 任务前需要在环境变量中配置`PYTHON_LAUNCHER`,部分 datax 发行版只支持 python2.7
## 创建任务

2
docs/docs/zh/guide/task/openmldb.md

@ -51,6 +51,6 @@ OpenMLDB任务组件可以连接OpenMLDB集群执行任务。
OpenMLDB任务组件将使用OpenMLDB Python SDK来连接OpenMLDB。所以你需要Python环境。
我们默认使用`python3`,你可以通过配置`PYTHON_HOME`来设置自己的Python环境。
我们默认使用`python3`,你可以通过配置`PYTHON_LAUNCHER`来设置自己的Python环境。
请确保已通过`pip install openmldb`,在worker server的主机中安装了OpenMLDB Python SDK。

14
docs/docs/zh/guide/task/pytorch.md

@ -41,12 +41,12 @@
#### python环境参数
| **任务参数** | **描述** |
|--------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|
| 是否创建新环境 | 是否创建新的python环境来运行该任务 |
| python命令路径 | 如`/usr/bin/python`,默认为DS环境配置中的`${PYTHON_HOME}` |
| python环境管理工具 | 可以选择virtualenv或者conda,若选择`virtualenv`,则会用`virtualenv`创建一个新环境,使用命令 `virtualenv -p ${PYTHON_HOME} venv` 创建;若选择`conda`, 则会使用`conda` 创建一个新环境,并需要指定创建的python版本 |
| 依赖文件 | 默认为 requirements.txt |
| **任务参数** | **描述** |
|--------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 是否创建新环境 | 是否创建新的python环境来运行该任务 |
| python命令路径 | 如`/usr/bin/python`,默认为DS环境配置中的`${PYTHON_LAUNCHER}` |
| python环境管理工具 | 可以选择virtualenv或者conda,若选择`virtualenv`,则会用`virtualenv`创建一个新环境,使用命令 `virtualenv -p ${PYTHON_LAUNCHER} venv` 创建;若选择`conda`, 则会使用`conda` 创建一个新环境,并需要指定创建的python版本 |
| 依赖文件 | 默认为 requirements.txt |
配置了`python项目地址`参数,那么`python脚本`和`依赖文件`参数允许输入相对路径
@ -84,7 +84,7 @@ export PATH=$HOME/anaconda3/bin:$PATH
```shell
# virtualenv命令对应的目录加入PATH中
export PATH=/home/lucky/anaconda3/bin:$PATH
export PYTHON_HOME=/usr/local/bin/python3.7
export PYTHON_LAUNCHER=/usr/local/bin/python3.7
```
## 其他

1
docs/docs/zh/guide/upgrade/incompatible.md

@ -4,6 +4,7 @@
## dev
* 更改了环境变量名称,将 `PYTHON_HOME` 改为 `PYTHON_LAUNCHER`, 将 `DATAX_HOME` 改为 `DATAX_LAUNCHER` ([#14523](https://github.com/apache/dolphinscheduler/pull/14523))
* 更新了SQL任务中用于匹配变量的正则表达式 ([#13378](https://github.com/apache/dolphinscheduler/pull/13378))
* Remove the spark version of spark task ([#11860](https://github.com/apache/dolphinscheduler/pull/11860)).
* Change the default unix shell executor from sh to bash ([#12180](https://github.com/apache/dolphinscheduler/pull/12180)).

6
dolphinscheduler-api/src/test/java/org/apache/dolphinscheduler/api/service/EnvironmentServiceTest.java

@ -296,14 +296,14 @@ public class EnvironmentServiceTest {
return "export HADOOP_HOME=/opt/hadoop-2.6.5\n"
+ "export HADOOP_CONF_DIR=/etc/hadoop/conf\n"
+ "export SPARK_HOME=/opt/soft/spark\n"
+ "export PYTHON_HOME=/opt/soft/python\n"
+ "export PYTHON_LAUNCHER=/opt/soft/python/bin/python3\n"
+ "export JAVA_HOME=/opt/java/jdk1.8.0_181-amd64\n"
+ "export HIVE_HOME=/opt/soft/hive\n"
+ "export FLINK_HOME=/opt/soft/flink\n"
+ "export DATAX_HOME=/opt/soft/datax\n"
+ "export DATAX_LAUNCHER=/opt/soft/datax/bin/python3\n"
+ "export YARN_CONF_DIR=\"/etc/hadoop/conf\"\n"
+ "\n"
+ "export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH\n"
+ "export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH\n"
+ "\n"
+ "export HADOOP_CLASSPATH=`hadoop classpath`\n"
+ "\n"

6
dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/mapper/ClusterMapperTest.java

@ -174,14 +174,14 @@ public class ClusterMapperTest extends BaseDaoTest {
return "export HADOOP_HOME=/opt/hadoop-2.6.5\n"
+ "export HADOOP_CONF_DIR=/etc/hadoop/conf\n"
+ "export SPARK_HOME=/opt/soft/spark\n"
+ "export PYTHON_HOME=/opt/soft/python\n"
+ "export PYTHON_LAUNCHER=/opt/soft/python/bin/python3\n"
+ "export JAVA_HOME=/opt/java/jdk1.8.0_181-amd64\n"
+ "export HIVE_HOME=/opt/soft/hive\n"
+ "export FLINK_HOME=/opt/soft/flink\n"
+ "export DATAX_HOME=/opt/soft/datax\n"
+ "export DATAX_LAUNCHER=/opt/soft/datax/bin/python3\n"
+ "export YARN_CONF_DIR=\"/etc/hadoop/conf\"\n"
+ "\n"
+ "export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH\n"
+ "export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH\n"
+ "\n"
+ "export HADOOP_CLASSPATH=`hadoop classpath`\n"
+ "\n"

6
dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/mapper/EnvironmentMapperTest.java

@ -174,14 +174,14 @@ public class EnvironmentMapperTest extends BaseDaoTest {
return "export HADOOP_HOME=/opt/hadoop-2.6.5\n"
+ "export HADOOP_CONF_DIR=/etc/hadoop/conf\n"
+ "export SPARK_HOME=/opt/soft/spark\n"
+ "export PYTHON_HOME=/opt/soft/python\n"
+ "export PYTHON_LAUNCHER=/opt/soft/python/bin/python3\n"
+ "export JAVA_HOME=/opt/java/jdk1.8.0_181-amd64\n"
+ "export HIVE_HOME=/opt/soft/hive\n"
+ "export FLINK_HOME=/opt/soft/flink\n"
+ "export DATAX_HOME=/opt/soft/datax\n"
+ "export DATAX_LAUNCHER=/opt/soft/datax/bin/python3\n"
+ "export YARN_CONF_DIR=\"/etc/hadoop/conf\"\n"
+ "\n"
+ "export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH\n"
+ "export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH\n"
+ "\n"
+ "export HADOOP_CLASSPATH=`hadoop classpath`\n"
+ "\n"

2
dolphinscheduler-dao/src/test/java/org/apache/dolphinscheduler/dao/utils/TaskCacheUtilsTest.java

@ -167,7 +167,7 @@ class TaskCacheUtilsTest {
// task definition version is changed, so cache key changed
Assertions.assertNotEquals(cacheKeyD, cacheKeyE);
taskInstance.setEnvironmentConfig("export PYTHON_HOME=/bin/python3");
taskInstance.setEnvironmentConfig("export PYTHON_LAUNCHER=/bin/python3");
String cacheKeyF = TaskCacheUtils.generateCacheKey(taskInstance, taskExecutionContext, storageOperate);
// EnvironmentConfig is changed, so cache key changed
Assertions.assertNotEquals(cacheKeyE, cacheKeyF);

12
dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/main/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTask.java

@ -54,7 +54,6 @@ import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ExecutionException;
import com.alibaba.druid.sql.ast.SQLStatement;
@ -74,14 +73,13 @@ public class DataxTask extends AbstractTask {
/**
* jvm parameters
*/
public static final String JVM_PARAM = " --jvm=\"-Xms%sG -Xmx%sG\" ";
public static final String JVM_PARAM = "--jvm=\"-Xms%sG -Xmx%sG\" ";
public static final String CUSTOM_PARAM = " -D%s='%s'";
/**
* python process(datax only supports version 2.7 by default)
* todo: Create a shell script to execute the datax task, and read the python version from the env, so we can support multiple versions of datax python
*/
private static final String DATAX_PYTHON = Optional.ofNullable(System.getenv("DATAX_PYTHON")).orElse("python2.7");
private static final String PYTHON_LAUNCHER = "${PYTHON_LAUNCHER}";
/**
* select all
@ -95,7 +93,7 @@ public class DataxTask extends AbstractTask {
/**
* datax path
*/
private static final String DATAX_PATH = "${DATAX_HOME}/bin/datax.py";
private static final String DATAX_LAUNCHER = "${DATAX_LAUNCHER}";
/**
* datax channel count
*/
@ -380,9 +378,9 @@ public class DataxTask extends AbstractTask {
*/
protected String buildCommand(String jobConfigFilePath, Map<String, Property> paramsMap) {
// datax python command
return DATAX_PYTHON +
return PYTHON_LAUNCHER +
" " +
DATAX_PATH +
DATAX_LAUNCHER +
" " +
loadJvmEnv(dataXParameters) +
addCustomParameters(paramsMap) +

6
dolphinscheduler-task-plugin/dolphinscheduler-task-datax/src/test/java/org/apache/dolphinscheduler/plugin/task/datax/DataxTaskTest.java

@ -122,7 +122,7 @@ public class DataxTaskTest {
Assertions.assertTrue(delete);
Assertions.assertEquals(dataxTask.buildCommand("/tmp/execution/app-id_job.json", null),
"python2.7 ${DATAX_HOME}/bin/datax.py --jvm=\"-Xms1G -Xmx1G\" /tmp/execution/app-id_job.json");
"${PYTHON_LAUNCHER} ${DATAX_LAUNCHER} --jvm=\"-Xms1G -Xmx1G\" /tmp/execution/app-id_job.json");
}
@Test
@ -157,7 +157,7 @@ public class DataxTaskTest {
Assertions.assertTrue(delete);
Assertions.assertEquals(dataxTask.buildCommand("/tmp/execution/app-id_job.json", createPrepareParamsMap()),
"python2.7 ${DATAX_HOME}/bin/datax.py --jvm=\"-Xms1G -Xmx1G\" -p \"-DDT='DT' -DDS='DS'\" /tmp/execution/app-id_job.json");
"${PYTHON_LAUNCHER} ${DATAX_LAUNCHER} --jvm=\"-Xms1G -Xmx1G\" -p \"-DDT='DT' -DDS='DS'\" /tmp/execution/app-id_job.json");
}
@Test
@ -237,7 +237,7 @@ public class DataxTaskTest {
DataxParameters dataXParameters = createDataxParameters();
dataXParameters.setXms(3);
dataXParameters.setXmx(4);
Assertions.assertEquals(dataxTask.loadJvmEnv(dataXParameters), " --jvm=\"-Xms3G -Xmx4G\" ");
Assertions.assertEquals(dataxTask.loadJvmEnv(dataXParameters), "--jvm=\"-Xms3G -Xmx4G\" ");
}
private DataxParameters createDataxParameters() {

4
dolphinscheduler-task-plugin/dolphinscheduler-task-openmldb/src/main/java/org/apache/dolphinscheduler/plugin/task/openmldb/OpenmldbTask.java

@ -132,7 +132,7 @@ public class OpenmldbTask extends PythonTask {
/**
* Build the python task command.
* If user have set the 'PYTHON_HOME' environment, we will use the 'PYTHON_HOME',
* If user have set the 'PYTHON_LAUNCHER' environment, we will use the 'PYTHON_LAUNCHER',
* if not, we will default use python.
*
* @param pythonFile Python file, cannot be empty.
@ -145,7 +145,7 @@ public class OpenmldbTask extends PythonTask {
}
private String getPythonCommand() {
String pythonHome = System.getenv(PYTHON_HOME);
String pythonHome = System.getenv(PYTHON_LAUNCHER);
return getPythonCommand(pythonHome);
}

35
dolphinscheduler-task-plugin/dolphinscheduler-task-python/src/main/java/org/apache/dolphinscheduler/plugin/task/python/PythonConstants.java

@ -1,35 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.dolphinscheduler.plugin.task.python;
public class PythonConstants {
private PythonConstants() {
throw new IllegalStateException("Utility class");
}
/**
* python home
*/
public static final String PYTHON_HOME = "PYTHON_HOME";
/**
* EQUAL SIGN
*/
public static final String EQUAL_SIGN = "=";
}

8
dolphinscheduler-task-plugin/dolphinscheduler-task-python/src/main/java/org/apache/dolphinscheduler/plugin/task/python/PythonTask.java

@ -59,9 +59,7 @@ public class PythonTask extends AbstractTask {
protected TaskExecutionContext taskRequest;
protected static final String PYTHON_HOME = "PYTHON_HOME";
private static final String DEFAULT_PYTHON_VERSION = "python";
protected static final String PYTHON_LAUNCHER = "PYTHON_LAUNCHER";
/**
* constructor
@ -185,7 +183,7 @@ public class PythonTask extends AbstractTask {
/**
* Build the python task command.
* If user have set the 'PYTHON_HOME' environment, we will use the 'PYTHON_HOME',
* If user have set the 'PYTHON_LAUNCHER' environment, we will use the 'PYTHON_LAUNCHER',
* if not, we will default use python.
*
* @param pythonFile Python file, cannot be empty.
@ -194,7 +192,7 @@ public class PythonTask extends AbstractTask {
protected String buildPythonExecuteCommand(String pythonFile) {
Preconditions.checkNotNull(pythonFile, "Python file cannot be null");
String pythonHome = String.format("${%s}", PYTHON_HOME);
String pythonHome = String.format("${%s}", PYTHON_LAUNCHER);
return pythonHome + " " + pythonFile;
}

2
dolphinscheduler-task-plugin/dolphinscheduler-task-python/src/test/java/org/apache/dolphinscheduler/plugin/task/python/PythonTaskTest.java

@ -25,7 +25,7 @@ public class PythonTaskTest {
@Test
public void buildPythonExecuteCommand() throws Exception {
PythonTask pythonTask = createPythonTask();
Assertions.assertEquals("${PYTHON_HOME} test.py", pythonTask.buildPythonExecuteCommand("test.py"));
Assertions.assertEquals("${PYTHON_LAUNCHER} test.py", pythonTask.buildPythonExecuteCommand("test.py"));
}
private PythonTask createPythonTask() {

2
dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/main/java/org/apache/dolphinscheduler/plugin/task/pytorch/PythonEnvManager.java

@ -38,7 +38,7 @@ public class PythonEnvManager {
private static final String VIRTUALENV_SOURCE = "source %s/bin/activate";
private static final String VIRTUALENV_BUILD = "virtualenv -p ${PYTHON_HOME} %s";
private static final String VIRTUALENV_BUILD = "virtualenv -p ${PYTHON_LAUNCHER} %s";
private static final String INSTALL_COMMAND = "%s -m pip install -r %s";

10
dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/main/java/org/apache/dolphinscheduler/plugin/task/pytorch/PytorchParameters.java

@ -35,7 +35,7 @@ public class PytorchParameters extends AbstractParameters {
private String pythonPath = ".";
private String script;
private String scriptParams;
private String pythonCommand = "${PYTHON_HOME}";
private String pythonLauncher = "${PYTHON_LAUNCHER}";
private String pythonEnvTool = PythonEnvManager.ENV_TOOL_VENV;
private String requirements = "requirements.txt";
private String condaPythonVersion = "3.9";
@ -58,12 +58,12 @@ public class PytorchParameters extends AbstractParameters {
return getPossiblePath(requirements);
}
public String getPythonCommand() {
public String getPythonLauncher() {
String command;
if (pythonCommand.isEmpty()) {
command = "${PYTHON_HOME}";
if (pythonLauncher.isEmpty()) {
command = "${PYTHON_LAUNCHER}";
} else {
command = pythonCommand;
command = pythonLauncher;
}
return command;
}

2
dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/main/java/org/apache/dolphinscheduler/plugin/task/pytorch/PytorchTask.java

@ -129,7 +129,7 @@ public class PytorchTask extends AbstractTask {
if (pytorchParameters.getIsCreateEnvironment()) {
pythonCommand = pythonEnvManager.getPythonCommand();
} else {
pythonCommand = pytorchParameters.getPythonCommand();
pythonCommand = pytorchParameters.getPythonLauncher();
}
return pythonCommand;

14
dolphinscheduler-task-plugin/dolphinscheduler-task-pytorch/src/test/java/org/apache/dolphinscheduler/plugin/task/pytorch/PytorchTaskTest.java

@ -68,7 +68,7 @@ public class PytorchTaskTest {
envManager.setPythonEnvTool(PythonEnvManager.ENV_TOOL_VENV);
String venvEnvCommand = envManager.getBuildEnvCommand(requirementPath);
Assertions.assertEquals(venvEnvCommand,
"virtualenv -p ${PYTHON_HOME} ./venv && source ./venv/bin/activate && ./venv/bin/python -m pip install -r "
"virtualenv -p ${PYTHON_LAUNCHER} ./venv && source ./venv/bin/activate && ./venv/bin/python -m pip install -r "
+ requirementPath);
}
@ -103,15 +103,15 @@ public class PytorchTaskTest {
PytorchTask task1 = initTask(parameters);
Assertions.assertEquals(task1.buildPythonExecuteCommand(),
"export PYTHONPATH=.\n" +
"${PYTHON_HOME} main.py --epochs=1 --dry-run");
"${PYTHON_LAUNCHER} main.py --epochs=1 --dry-run");
parameters.setPythonCommand("");
parameters.setPythonLauncher("");
PytorchTask task2 = initTask(parameters);
Assertions.assertEquals(task2.buildPythonExecuteCommand(),
"export PYTHONPATH=.\n" +
"${PYTHON_HOME} main.py --epochs=1 --dry-run");
"${PYTHON_LAUNCHER} main.py --epochs=1 --dry-run");
parameters.setPythonCommand("/usr/bin/python");
parameters.setPythonLauncher("/usr/bin/python");
PytorchTask task3 = initTask(parameters);
Assertions.assertEquals(task3.buildPythonExecuteCommand(),
"export PYTHONPATH=.\n" +
@ -151,7 +151,7 @@ public class PytorchTaskTest {
PytorchTask task = initTask(parameters);
Assertions.assertEquals(task.buildPythonExecuteCommand(),
"export PYTHONPATH=.\n" +
"virtualenv -p ${PYTHON_HOME} ./venv && source ./venv/bin/activate && ./venv/bin/python -m pip install -r requirements.txt\n"
"virtualenv -p ${PYTHON_LAUNCHER} ./venv && source ./venv/bin/activate && ./venv/bin/python -m pip install -r requirements.txt\n"
+
"./venv/bin/python main.py --epochs=1 --dry-run");
@ -178,7 +178,7 @@ public class PytorchTaskTest {
createFile(scriptFile);
String expected = "export PYTHONPATH=%s\n" +
"virtualenv -p ${PYTHON_HOME} ./venv && source ./venv/bin/activate && ./venv/bin/python -m pip install -r %s\n"
"virtualenv -p ${PYTHON_LAUNCHER} ./venv && source ./venv/bin/activate && ./venv/bin/python -m pip install -r %s\n"
+
"./venv/bin/python %s";
System.out.println(task.buildPythonExecuteCommand());

2
dolphinscheduler-ui/src/locales/en_US/project.ts

@ -847,7 +847,7 @@ export default {
pytorch_python_path: 'Project Path',
pytorch_is_create_environment: 'Create An Environment Or Not',
pytorch_python_command: 'Python Command Path',
pytorch_python_command_tips: 'If empty,will be set $PYTHON_HOME',
pytorch_python_command_tips: 'If empty,will be set $PYTHON_LAUNCHER',
pytorch_python_env_tool: 'Python Environment Manager Tool',
pytorch_requirements: 'Requirement File',
pytorch_conda_python_version: 'Python Version',

2
dolphinscheduler-ui/src/locales/zh_CN/project.ts

@ -823,7 +823,7 @@ export default {
pytorch_python_path: 'python项目地址',
pytorch_is_create_environment: '是否创建新环境',
pytorch_python_command: 'python命令路径',
pytorch_python_command_tips: '若为空,则使用$PYTHON_HOME',
pytorch_python_command_tips: '若为空,则使用$PYTHON_LAUNCHER',
pytorch_python_env_tool: 'python环境管理工具',
pytorch_requirements: '依赖文件',
pytorch_conda_python_version: 'python版本',

2
dolphinscheduler-ui/src/views/projects/task/components/node/tasks/use-pytorch.ts

@ -45,7 +45,7 @@ export function usePytorch({
timeout: 30,
timeoutNotifyStrategy: ['WARN'],
pythonEnvTool: 'conda',
pythonCommand: '${PYTHON_HOME}',
pythonCommand: '${PYTHON_LAUNCHER}',
condaPythonVersion: '3.7',
requirements: 'requirements.txt',
pythonPath: '.'

6
dolphinscheduler-ui/src/views/security/environment-manage/components/environment-modal.tsx

@ -31,13 +31,13 @@ const envConfigPlaceholder =
'export HADOOP_HOME=/opt/hadoop-2.6.5\n' +
'export HADOOP_CONF_DIR=/etc/hadoop/conf\n' +
'export SPARK_HOME=/opt/soft/spark\n' +
'export PYTHON_HOME=/opt/soft/python\n' +
'export PYTHON_LAUNCHER=/opt/soft/python/bin/python3\n' +
'export JAVA_HOME=/opt/java/jdk1.8.0_181-amd64\n' +
'export HIVE_HOME=/opt/soft/hive\n' +
'export FLINK_HOME=/opt/soft/flink\n' +
'export DATAX_HOME=/opt/soft/datax\n' +
'export DATAX_LAUNCHER=/opt/soft/datax/bin/datax.py\n' +
'export YARN_CONF_DIR=/etc/hadoop/conf\n' +
'export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$PATH\n' +
'export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH\n' +
'export HADOOP_CLASSPATH=`hadoop classpath`\n'
const EnvironmentModal = defineComponent({

Loading…
Cancel
Save