From 64cee03fed1002770e552abbce451b699c252cdc Mon Sep 17 00:00:00 2001 From: Eric Gao Date: Mon, 20 Jun 2022 14:47:44 +0800 Subject: [PATCH] [Feature] Enable users to switch and install conda env in jupyter task (#10337) --- docs/docs/en/guide/task/jupyter.md | 37 +++++++++++++- docs/docs/zh/guide/task/jupyter.md | 47 +++++++++++++++--- .../plugin/task/jupyter/JupyterConstants.java | 12 +++++ .../task/jupyter/JupyterParameters.java | 20 ++++++++ .../plugin/task/jupyter/JupyterTask.java | 10 +++- .../plugin/task/jupyter/JupyterTaskTest.java | 48 +++++++++++++++++-- .../components/node/fields/use-jupyter.ts | 3 +- 7 files changed, 163 insertions(+), 14 deletions(-) diff --git a/docs/docs/en/guide/task/jupyter.md b/docs/docs/en/guide/task/jupyter.md index 94208bbe10..51b7652eb9 100644 --- a/docs/docs/en/guide/task/jupyter.md +++ b/docs/docs/en/guide/task/jupyter.md @@ -11,6 +11,41 @@ it will use `papermill` to evaluate jupyter notes. Click [here](https://papermil Click [here](https://docs.conda.io/en/latest/) for more information about `conda`. - `conda.path` is set to `/opt/anaconda3/etc/profile.d/conda.sh` by default. If you have no idea where your `conda` is, simply run `conda info | grep -i 'base environment'`. +> NOTICE: `Jupyter Task Plugin` uses `source` command to activate conda environment. +> If your tenant does not have permission to use `source`, `Jupyter Task Plugin` will not function. + + +## Python Dependency Management + +### Use Pre-Installed Conda Environment + +1. Create a conda environment manually or using `shell task` on your target worker. +2. In your `jupyter task`, set `condaEnvName` as the name of the conda environment you just created. + +### Use Packed Conda Environment + +1. Use [Conda-Pack](https://conda.github.io/conda-pack/) to pack your conda environment into `tarball`. +2. Upload packed conda environment to `resource center`. +3. Select your packed conda environment as `resource` in your `jupyter task`, e.g. `jupyter_env.tar.gz`. + +> **_Note:_** Make sure you follow the [Conda-Pack](https://conda.github.io/conda-pack/) official instructions. +> If you unpack your packed conda environment, the directory structure should be the same as below: + +``` +. +├── bin +├── conda-meta +├── etc +├── include +├── lib +├── share +└── ssl +``` + +> NOTICE: Please follow the `conda pack` instructions above strictly, and DO NOT modify `bin/activate`. +> `Jupyter Task Plugin` uses `source` command to activate your packed conda environment. +> If you are concerned about using `source`, choose other options to manage your python dependency. + ## Create Task - Click Project Management-Project Name-Workflow Definition, and click the "Create Workflow" button to enter the DAG editing page. @@ -28,7 +63,7 @@ Click [here](https://docs.conda.io/en/latest/) for more information about `conda - Cpu quota: Assign the specified CPU time quota to the task executed. Takes a percentage value. Default -1 means unlimited. For example, the full CPU load of one core is 100%,and that of 16 cores is 1600%. This function is controlled by [task.resource.limit.state](../../architecture/configuration.md) - Max memory:Assign the specified max memory to the task executed. Exceeding this limit will trigger oom to be killed and will not automatically retry. Takes an MB value. Default -1 means unlimited. This function is controlled by [task.resource.limit.state](../../architecture/configuration.md) - Timeout alarm: Check the timeout alarm and timeout failure. When the task exceeds the "timeout period", an alarm email will send and the task execution will fail. -- Conda Env Name: Name of conda environment. +- Conda Env Name: Name of conda environment or packed conda environment tarball. - Input Note Path: Path of input jupyter note template. - Out Note Path: Path of output note. - Jupyter Parameters: Parameters in json format used for jupyter note parameterization. diff --git a/docs/docs/zh/guide/task/jupyter.md b/docs/docs/zh/guide/task/jupyter.md index 1372c843b3..1698630aed 100644 --- a/docs/docs/zh/guide/task/jupyter.md +++ b/docs/docs/zh/guide/task/jupyter.md @@ -1,23 +1,56 @@ # Jupyter -## Overview +## 综述 `Jupyter`任务类型,用于创建并执行`Jupyter`类型任务。worker 执行该任务的时候,会通过`papermill`执行`jupyter note`。 点击[这里](https://papermill.readthedocs.io/en/latest/) 获取更多关于`papermill`的信息。 -## Conda Configuration +## Conda虚拟环境配置 - 在`common.properties`配置`conda.path`,将其指向您的`conda.sh`。这里的`conda`应该是您用来管理您的 `papermill`和`jupyter`所在python环境的相同`conda`。 点击 [这里](https://docs.conda.io/en/latest/) 获取更多关于`conda`的信息. - `conda.path`默认设置为`/opt/anaconda3/etc/profile.d/conda.sh`。 如果您不清楚您的`conda`环境在哪里,只需要在命令行执行`conda info | grep -i 'base environment'`即可获得。 +> 注意:`Jupyter任务插件`使用`source`命令激活conda环境, +> 如果您的租户没有`source`命令使用权限,`Jupyter任务插件`将无法使用。 -## Create Task +## Python依赖管理 + +### 使用预装好的Conda环境 + +1. 手动或使用`shell任务`在您的目标机器上创建conda环境。 +2. 在您的`jupyter任务`中,将`condaEnvName`设置为您在上一步创建的conda环境名。 + +### 使用打包的Conda环境 + +1. 使用 [Conda-Pack](https://conda.github.io/conda-pack/) 将您的conda环境打包成`tarball`. +2. 将您打包好的conda环境上传到`资源中心`. +3. 在您的`jupyter任务`资源设置中,添加您在上一步中上传的conda环境包,如`jupyter_env.tar.gz`. + +> **_提示:_** 请您按照 [Conda-Pack](https://conda.github.io/conda-pack/) 官方指导打包conda环境, +> 正确打包出的conda环境包解压后文件目录结构应和下图完全一致: + +``` +. +├── bin +├── conda-meta +├── etc +├── include +├── lib +├── share +└── ssl +``` + +> 注意: 请严格按照上述`conda pack`指示操作,并且不要随意修改`bin/activate`。 +> `Jupyter任务插件`使用`source`命令激活您打包的conda环境。 +> 若您对使用`source`命令有安全性上的担忧,请使用其他方法管理您的python依赖。 + +## 创建任务 - 点击项目管理-项目名称-工作流定义,点击"创建工作流"按钮,进入DAG编辑页面。 - 工具栏中拖动 到画板中,即可完成创建。 -## Task Parameter +## 任务参数 - 任务名称:设置任务的名称。一个工作流定义中的节点名称是唯一的。 - 运行标志:标识这个节点是否能正常调度,如果不需要执行,可以打开禁止执行开关。 @@ -30,7 +63,7 @@ - 最大内存:为执行的任务分配指定的内存大小,超过会触发OOM被Kill同时不会进行自动重试,单位MB,默认-1代表不限制。这个功能由 [task.resource.limit.state](../../architecture/configuration.md) 控制 - 超时告警:勾选超时告警、超时失败,当任务超过"超时时长"后,会发送告警邮件并且任务执行失败.这个功能由 [task.resource.limit.state](../../architecture/configuration.md) 控制 - 前置任务:选择当前任务的前置任务,会将被选择的前置任务设置为当前任务的上游。 -- Conda Env Name: Conda环境名称。 +- Conda Env Name: Conda环境或打包的Conda环境包名称 - Input Note Path: 输入的jupyter note模板路径。 - Out Note Path: 输出的jupyter note路径。 - Jupyter Parameters: 用于对接jupyter note参数化的JSON格式参数。 @@ -40,9 +73,9 @@ - Jupyter Start Timeout: 对于jupyter notebook kernel设定的启动超时时间。 - Others: 传入papermill命令的其他参数。 -## Task Example +## 任务样例 -### Jupyter Task Example +### 简单的Jupyter任务样例 这个示例展示了如何创建Jupyter任务节点: diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/main/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterConstants.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/main/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterConstants.java index 84fa750a95..8b4069c048 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/main/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterConstants.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/main/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterConstants.java @@ -33,6 +33,18 @@ public class JupyterConstants { */ public static final String CONDA_ACTIVATE = "conda activate"; + /** + * create and activate conda env from tar + */ + public static final String CREATE_ENV_FROM_TAR = "mkdir jupyter_env && " + + "tar -xzf %s -C jupyter_env && " + + "source jupyter_env/bin/activate"; + + /** + * file suffix tar.gz + */ + public static final String TAR_SUFFIX = ".tar.gz"; + /** * jointer to combine two command */ diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/main/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterParameters.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/main/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterParameters.java index 5408a6e977..80f9e6ca72 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/main/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterParameters.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/main/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterParameters.java @@ -17,8 +17,11 @@ package org.apache.dolphinscheduler.plugin.task.jupyter; +import org.apache.dolphinscheduler.plugin.task.api.model.ResourceInfo; import org.apache.dolphinscheduler.plugin.task.api.parameters.AbstractParameters; +import java.util.List; + /** * jupyter parameters */ @@ -69,6 +72,10 @@ public class JupyterParameters extends AbstractParameters { */ private String others; + /** + * resource list + */ + private List resourceList; public String getCondaEnvName() { return condaEnvName; @@ -142,6 +149,19 @@ public class JupyterParameters extends AbstractParameters { this.others = others; } + public List getResourceList() { + return resourceList; + } + + public void setResourceList(List resourceList) { + this.resourceList = resourceList; + } + + @Override + public List getResourceFilesList() { + return resourceList; + } + @Override public boolean checkParameters() { return condaEnvName != null && inputNotePath != null && outputNotePath != null; diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/main/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterTask.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/main/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterTask.java index 8d071aed35..96f7882797 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/main/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterTask.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/main/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterTask.java @@ -107,8 +107,14 @@ public class JupyterTask extends AbstractTaskExecutor { args.add(JupyterConstants.CONDA_INIT); args.add(condaPath); args.add(JupyterConstants.JOINTER); - args.add(JupyterConstants.CONDA_ACTIVATE); - args.add(jupyterParameters.getCondaEnvName()); + String condaEnvName = jupyterParameters.getCondaEnvName(); + if (condaEnvName.endsWith(JupyterConstants.TAR_SUFFIX)) { + args.add(String.format(JupyterConstants.CREATE_ENV_FROM_TAR, condaEnvName)); + } else { + args.add(JupyterConstants.CONDA_ACTIVATE); + args.add(jupyterParameters.getCondaEnvName()); + } + args.add(JupyterConstants.JOINTER); args.add(JupyterConstants.PAPERMILL); args.add(jupyterParameters.getInputNotePath()); diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/test/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterTaskTest.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/test/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterTaskTest.java index eef96cfb7e..c55aa4935a 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/test/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterTaskTest.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-jupyter/src/test/java/org/apache/dolphinscheduler/plugin/task/jupyter/JupyterTaskTest.java @@ -46,8 +46,8 @@ import static org.powermock.api.mockito.PowerMockito.when; public class JupyterTaskTest { @Test - public void testBuildJupyterCommand() throws Exception { - String parameters = buildJupyterCommand(); + public void testBuildJupyterCommandWithLocalEnv() throws Exception { + String parameters = buildJupyterCommandWithLocalEnv(); TaskExecutionContext taskExecutionContext = PowerMockito.mock(TaskExecutionContext.class); when(taskExecutionContext.getTaskParams()).thenReturn(parameters); PowerMockito.mockStatic(PropertyUtils.class); @@ -71,7 +71,35 @@ public class JupyterTaskTest { "--progress-bar"); } - private String buildJupyterCommand() { + @Test + public void testBuildJupyterCommandWithPackedEnv() throws Exception { + String parameters = buildJupyterCommandWithPackedEnv(); + TaskExecutionContext taskExecutionContext = PowerMockito.mock(TaskExecutionContext.class); + when(taskExecutionContext.getTaskParams()).thenReturn(parameters); + PowerMockito.mockStatic(PropertyUtils.class); + when(PropertyUtils.getString(any())).thenReturn("/opt/anaconda3/etc/profile.d/conda.sh"); + JupyterTask jupyterTask = spy(new JupyterTask(taskExecutionContext)); + jupyterTask.init(); + Assert.assertEquals(jupyterTask.buildCommand(), + "source /opt/anaconda3/etc/profile.d/conda.sh && " + + "mkdir jupyter_env && " + + "tar -xzf jupyter.tar.gz -C jupyter_env && " + + "source jupyter_env/bin/activate && " + + "papermill " + + "/test/input_note.ipynb " + + "/test/output_note.ipynb " + + "--parameters city Shanghai " + + "--parameters factor 0.01 " + + "--kernel python3 " + + "--engine default_engine " + + "--execution-timeout 10 " + + "--start-timeout 3 " + + "--version " + + "--inject-paths " + + "--progress-bar"); + } + + private String buildJupyterCommandWithLocalEnv() { JupyterParameters jupyterParameters = new JupyterParameters(); jupyterParameters.setCondaEnvName("jupyter-lab"); jupyterParameters.setInputNotePath("/test/input_note.ipynb"); @@ -85,4 +113,18 @@ public class JupyterTaskTest { return JSONUtils.toJsonString(jupyterParameters); } + private String buildJupyterCommandWithPackedEnv() { + JupyterParameters jupyterParameters = new JupyterParameters(); + jupyterParameters.setCondaEnvName("jupyter.tar.gz"); + jupyterParameters.setInputNotePath("/test/input_note.ipynb"); + jupyterParameters.setOutputNotePath("/test/output_note.ipynb"); + jupyterParameters.setParameters("{\"city\": \"Shanghai\", \"factor\": \"0.01\"}"); + jupyterParameters.setKernel("python3"); + jupyterParameters.setEngine("default_engine"); + jupyterParameters.setExecutionTimeout("10"); + jupyterParameters.setStartTimeout("3"); + jupyterParameters.setOthers("--version"); + return JSONUtils.toJsonString(jupyterParameters); + } + } diff --git a/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-jupyter.ts b/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-jupyter.ts index c9512bb673..582b308d6c 100644 --- a/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-jupyter.ts +++ b/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-jupyter.ts @@ -15,7 +15,7 @@ * limitations under the License. */ import { useI18n } from 'vue-i18n' -import { useCustomParams } from '.' +import { useCustomParams, useResources } from '.' import type { IJsonItem } from '../types' export function useJupyter(model: { [field: string]: any }): IJsonItem[] { @@ -121,6 +121,7 @@ export function useJupyter(model: { [field: string]: any }): IJsonItem[] { placeholder: t('project.node.jupyter_others_tips') } }, + useResources(), ...useCustomParams({ model, field: 'localParams', isSimple: false }) ] }