diff --git a/docs/docs/en/guide/task/mlflow.md b/docs/docs/en/guide/task/mlflow.md index 75911c03ae..0eec455238 100644 --- a/docs/docs/en/guide/task/mlflow.md +++ b/docs/docs/en/guide/task/mlflow.md @@ -20,7 +20,6 @@ The MLflow plugin currently supports and will support the following: - MLflow Models - MLFLOW: Use `MLflow models serve` to deploy a model service - Docker: Run the container after packaging the docker image - - Docker Compose: Use docker compose to run the container, it will replace the docker run above ## Create Task @@ -98,22 +97,26 @@ You can now use this feature to run all MLFlow projects on Github (For example [ ![mlflow-models-docker](../../../../img/tasks/demo/mlflow-models-docker.png) -#### DOCKER COMPOSE +## Environment to Prepare -![mlflow-models-docker-compose](../../../../img/tasks/demo/mlflow-models-docker-compose.png) +### Conda Environment +Please install [anaconda](https://docs.continuum.io/anaconda/install/) or [miniconda](https://docs.conda.io/en/latest/miniconda.html#installing) in advance. -| **Parameter** | **Description** | -|------------------|----------------------------------------------------------| -| Max Cpu Limit | For example, `1.0` or `0.5`, the same as docker compose. | -| Max Memory Limit | For example `1G` or `500M`, the same as docker compose. | +**Method A:** -## Environment to Prepare +Config anaconda environment in `/dolphinscheduler/conf/env/dolphinscheduler_env.sh`. -### Conda Environment +Add the following content to the file: + +```bash +# config anaconda environment +export PATH=/opt/anaconda3/bin:$PATH +``` -You need to enter the admin account to configure a conda environment variable(Please -install [anaconda](https://docs.continuum.io/anaconda/install/) -or [miniconda](https://docs.conda.io/en/latest/miniconda.html#installing) in advance). + +**Method B:** + +You need to enter the admin account to configure a conda environment variable. ![mlflow-conda-env](../../../../img/tasks/demo/mlflow-conda-env.png) @@ -139,3 +142,14 @@ After running, an MLflow service is started. After this, you can visit the MLflow service (`http://localhost:5000`) page to view the experiments and models. ![mlflow-server](../../../../img/tasks/demo/mlflow-server.png) + +### Preset Algorithm Repository Configuration + +If you can't access github, you can modify the following fields in the `commom.properties` configuration file to replace the github address with an accessible address. + +```yaml +# mlflow task plugin preset repository +ml.mlflow.preset_repository=https://github.com/apache/dolphinscheduler-mlflow +# mlflow task plugin preset repository version +ml.mlflow.preset_repository_version="main" +``` diff --git a/docs/docs/zh/guide/task/mlflow.md b/docs/docs/zh/guide/task/mlflow.md index 97754ebd55..3446e02e77 100644 --- a/docs/docs/zh/guide/task/mlflow.md +++ b/docs/docs/zh/guide/task/mlflow.md @@ -4,7 +4,7 @@ [MLflow](https://mlflow.org) 是一个MLops领域一个优秀的开源项目, 用于管理机器学习的生命周期,包括实验、可再现性、部署和中心模型注册。 -MLflow 组件用于执行 MLflow 任务,目前包含Mlflow Projects, 和MLflow Models。(Model Registry将在不就的将来支持)。 +MLflow 组件用于执行 MLflow 任务,目前包含Mlflow Projects,和MLflow Models。(Model Registry将在不就的将来支持)。 - MLflow Projects: 将代码打包,并可以运行到任务的平台上。 - MLflow Models: 在不同的服务环境中部署机器学习模型。 @@ -12,19 +12,13 @@ MLflow 组件用于执行 MLflow 任务,目前包含Mlflow Projects, 和MLflow 目前 Mlflow 组件支持的和即将支持的内容如下中: -- [x] MLflow Projects - - [x] BasicAlgorithm: 基础算法,包含LogisticRegression, svm, lightgbm, xgboost - - [x] AutoML: AutoML工具,包含autosklean, flaml - - [x] Custom projects: 支持运行自己的MLflow Projects项目 -- [ ] MLflow Models - - [x] MLFLOW: 直接使用 `mlflow models serve` 部署模型。 - - [x] Docker: 打包 DOCKER 镜像后部署模型。 - - [x] Docker Compose: 使用Docker Compose 部署模型,将会取代上面的Docker部署。 - - [ ] Seldon core: 构建完镜像后,使用Seldon Core 部署到k8s集群上, 可以使用Seldon Core的生成模型管理能力。 - - [ ] k8s: 构建完镜像后, 部署到k8s集群上。 - - [ ] MLflow deployments: 内置的允许MLflow 部署模块, 如内置的部署到Sagemaker等。 -- [ ] Model Registry - - [ ] Register Model: 注册相关工件(模型以及相关的参数,指标)到模型中心 +- MLflow Projects + - BasicAlgorithm: 基础算法,包含LogisticRegression, svm, lightgbm, xgboost + - AutoML: AutoML工具,包含autosklean, flaml + - Custom projects: 支持运行自己的MLflow Projects项目 +- MLflow Models + - MLFLOW: 直接使用 `mlflow models serve` 部署模型。 + - Docker: 打包 DOCKER 镜像后部署模型。 ## 创建任务 @@ -95,21 +89,25 @@ MLflow 组件用于执行 MLflow 任务,目前包含Mlflow Projects, 和MLflow ![mlflow-models-docker](../../../../img/tasks/demo/mlflow-models-docker.png) -#### DOCKER COMPOSE +## 环境准备 -![mlflow-models-docker-compose](../../../../img/tasks/demo/mlflow-models-docker-compose.png) +### conda 环境配置 -| **任务参数** | **描述** | -|----------|--------------------------------------| -| 最大CPU限制 | 如 `1.0` 或者 `0.5`,与 docker compose 一致 | -| 最大内存限制 | 如 `1G` 或者 `500M`,与 docker compose 一致 | +请提前[安装anaconda](https://docs.continuum.io/anaconda/install/) 或者[安装miniconda](https://docs.conda.io/en/latest/miniconda.html#installing) -## 环境准备 +**方法A:** -### conda 环境配置 +配置文件:/dolphinscheduler/conf/env/dolphinscheduler_env.sh。 + +在文件最后添加内容 +``` +# 配置你的conda环境路径 +export PATH=/opt/anaconda3/bin:$PATH +``` -你需要进入admin账户配置一个conda环境变量(请提前[安装anaconda](https://docs.continuum.io/anaconda/install/) -或者[安装miniconda](https://docs.conda.io/en/latest/miniconda.html#installing) )。 +**方法B:** + +你需要进入admin账户配置一个conda环境变量。 ![mlflow-conda-env](../../../../img/tasks/demo/mlflow-conda-env.png) @@ -117,6 +115,7 @@ MLflow 组件用于执行 MLflow 任务,目前包含Mlflow Projects, 和MLflow ![mlflow-set-conda-env](../../../../img/tasks/demo/mlflow-set-conda-env.png) + ### MLflow service 启动 确保你已经安装MLflow,可以使用`pip install mlflow`进行安装。 @@ -135,3 +134,15 @@ mlflow server -h 0.0.0.0 -p 5000 --serve-artifacts --backend-store-uri sqlite:// ![mlflow-server](../../../../img/tasks/demo/mlflow-server.png) + +### 内置算法仓库配置 + +如果遇到github无法访问的情况,可以修改`commom.properties`配置文件的以下字段,将github地址替换能访问的地址。 + +```yaml +# mlflow task plugin preset repository +ml.mlflow.preset_repository=https://github.com/apache/dolphinscheduler-mlflow +# mlflow task plugin preset repository version +ml.mlflow.preset_repository_version="main" +``` + diff --git a/docs/img/tasks/demo/mlflow-models-docker-compose.png b/docs/img/tasks/demo/mlflow-models-docker-compose.png deleted file mode 100644 index 7aad9641a2..0000000000 Binary files a/docs/img/tasks/demo/mlflow-models-docker-compose.png and /dev/null differ diff --git a/dolphinscheduler-common/src/main/resources/common.properties b/dolphinscheduler-common/src/main/resources/common.properties index 3b12d7c606..fcede11b50 100644 --- a/dolphinscheduler-common/src/main/resources/common.properties +++ b/dolphinscheduler-common/src/main/resources/common.properties @@ -105,4 +105,9 @@ alert.rpc.port=50052 conda.path=/opt/anaconda3/etc/profile.d/conda.sh # Task resource limit state -task.resource.limit.state=false \ No newline at end of file +task.resource.limit.state=false + +# mlflow task plugin preset repository +ml.mlflow.preset_repository=https://github.com/apache/dolphinscheduler-mlflow +# mlflow task plugin preset repository version +ml.mlflow.preset_repository_version="main" \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml index 232442a186..45e56726e1 100644 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml +++ b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml @@ -32,15 +32,6 @@ tasks: parameters: -P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9 experiment_name: xgboost - - - name: deploy_mlflow - deps: [train_xgboost_native] - task_type: MLflowModels - model_uri: models:/xgboost_native/Production - mlflow_tracking_uri: *mlflow_tracking_uri - deploy_mode: MLFLOW - port: 7001 - - name: train_automl task_type: MLFlowProjectsAutoML mlflow_tracking_uri: *mlflow_tracking_uri @@ -68,11 +59,11 @@ tasks: data_path: /data/examples/iris search_params: max_depth=[5, 10];n_estimators=[100, 200] - - - name: deploy_docker_compose - task_type: MLflowModels + - name: deploy_mlflow deps: [train_basic_algorithm] + task_type: MLflowModels model_uri: models:/iris_B/Production mlflow_tracking_uri: *mlflow_tracking_uri - deploy_mode: DOCKER COMPOSE - port: 7003 + deploy_mode: MLFLOW + port: 7001 + diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py index 328688e646..c2734bcf81 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py @@ -43,17 +43,6 @@ with ProcessDefinition( experiment_name="xgboost", ) - # Using MLFLOW to deploy model from custom mlflow project - deploy_mlflow = MLflowModels( - name="deploy_mlflow", - model_uri="models:/xgboost_native/Production", - mlflow_tracking_uri=mlflow_tracking_uri, - deploy_mode=MLflowDeployType.MLFLOW, - port=7001, - ) - - train_custom >> deploy_mlflow - # run automl to train model train_automl = MLFlowProjectsAutoML( name="train_automl", @@ -88,16 +77,16 @@ with ProcessDefinition( search_params="max_depth=[5, 10];n_estimators=[100, 200]", ) - # Using DOCKER COMPOSE to deploy model from train_basic_algorithm - deploy_docker_compose = MLflowModels( - name="deploy_docker_compose", + # Using MLFLOW to deploy model from training lightgbm project + deploy_mlflow = MLflowModels( + name="deploy_mlflow", model_uri="models:/iris_B/Production", mlflow_tracking_uri=mlflow_tracking_uri, - deploy_mode=MLflowDeployType.DOCKER_COMPOSE, - port=7003, + deploy_mode=MLflowDeployType.MLFLOW, + port=7001, ) - train_basic_algorithm >> deploy_docker_compose + train_basic_algorithm >> deploy_mlflow pd.submit() diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py index 44e6634822..e86797aadf 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py @@ -43,7 +43,6 @@ class MLflowDeployType(str): MLFLOW = "MLFLOW" DOCKER = "DOCKER" - DOCKER_COMPOSE = "DOCKER COMPOSE" DEFAULT_MLFLOW_TRACKING_URI = "http://127.0.0.1:5000" @@ -83,10 +82,8 @@ class MLflowModels(BaseMLflow): :param model_uri: Model-URI of MLflow , support models://suffix format and runs:/ format. See https://mlflow.org/docs/latest/tracking.html#artifact-stores :param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 - :param deploy_mode: MLflow deploy mode, support MLFLOW, DOCKER, DOCKER COMPOSE, default is DOCKER + :param deploy_mode: MLflow deploy mode, support MLFLOW, DOCKER, default is DOCKER :param port: deploy port, default is 7000 - :param cpu_limit: cpu limit, default is 1.0 - :param memory_limit: memory limit, default is 500M """ mlflow_task_type = MLflowTaskType.MLFLOW_MODELS @@ -95,8 +92,6 @@ class MLflowModels(BaseMLflow): "deploy_type", "deploy_model_key", "deploy_port", - "cpu_limit", - "memory_limit", } def __init__( @@ -106,8 +101,6 @@ class MLflowModels(BaseMLflow): mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, deploy_mode: Optional[str] = MLflowDeployType.DOCKER, port: Optional[int] = 7000, - cpu_limit: Optional[float] = 1.0, - memory_limit: Optional[str] = "500M", *args, **kwargs ): @@ -116,8 +109,6 @@ class MLflowModels(BaseMLflow): self.deploy_type = deploy_mode.upper() self.deploy_model_key = model_uri self.deploy_port = port - self.cpu_limit = cpu_limit - self.memory_limit = memory_limit class MLFlowProjectsCustom(BaseMLflow): diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py index 2159b6c77e..af0a324b53 100644 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py +++ b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py @@ -63,19 +63,15 @@ def test_mlflow_models_get_define(): name = "mlflow_models" model_uri = "models:/xgboost_native/Production" port = 7001 - cpu_limit = 2.0 - memory_limit = "600M" expect = deepcopy(EXPECT) expect["name"] = name task_params = expect["taskParams"] task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_MODELS - task_params["deployType"] = MLflowDeployType.DOCKER_COMPOSE + task_params["deployType"] = MLflowDeployType.DOCKER task_params["deployModelKey"] = model_uri task_params["deployPort"] = port - task_params["cpuLimit"] = cpu_limit - task_params["memoryLimit"] = memory_limit with patch( "pydolphinscheduler.core.task.Task.gen_code_and_version", @@ -85,10 +81,8 @@ def test_mlflow_models_get_define(): name=name, model_uri=model_uri, mlflow_tracking_uri=MLFLOW_TRACKING_URI, - deploy_mode=MLflowDeployType.DOCKER_COMPOSE, + deploy_mode=MLflowDeployType.DOCKER, port=port, - cpu_limit=cpu_limit, - memory_limit=memory_limit, ) assert task.get_define() == expect diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowConstants.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowConstants.java index c2701fb543..a13741230f 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowConstants.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowConstants.java @@ -28,15 +28,17 @@ public class MlflowConstants { public static final String JOB_TYPE_CUSTOM_PROJECT = "CustomProject"; - public static final String PRESET_REPOSITORY = "https://github.com/apache/dolphinscheduler-mlflow"; + public static final String PRESET_REPOSITORY_KEY = "ml.mlflow.preset_repository"; + + public static final String PRESET_REPOSITORY_VERSION_KEY = "ml.mlflow.preset_repository_version"; - public static final String PRESET_PATH = "dolphinscheduler-mlflow"; + public static final String PRESET_REPOSITORY = "https://github.com/apache/dolphinscheduler-mlflow"; public static final String PRESET_REPOSITORY_VERSION = "main"; - public static final String PRESET_AUTOML_PROJECT = PRESET_PATH + "#Project-AutoML"; + public static final String PRESET_AUTOML_PROJECT = "#Project-AutoML"; - public static final String PRESET_BASIC_ALGORITHM_PROJECT = PRESET_PATH + "#Project-BasicAlgorithm"; + public static final String PRESET_BASIC_ALGORITHM_PROJECT = "#Project-BasicAlgorithm"; public static final String MLFLOW_TASK_TYPE_PROJECTS = "MLflow Projects"; @@ -46,14 +48,6 @@ public class MlflowConstants { public static final String MLFLOW_MODELS_DEPLOY_TYPE_DOCKER = "DOCKER"; - public static final String MLFLOW_MODELS_DEPLOY_TYPE_DOCKER_COMPOSE = "DOCKER COMPOSE"; - - /** - * template file - */ - public static final String TEMPLATE_DOCKER_COMPOSE = "docker-compose.yml"; - - /** * mlflow command */ @@ -81,8 +75,7 @@ public class MlflowConstants { public static final String MLFLOW_RUN_CUSTOM_PROJECT = "mlflow run $repo " + "%s " - + "--experiment-name=\"%s\" " - + "--version=\"%s\" "; + + "--experiment-name=\"%s\""; public static final String MLFLOW_MODELS_SERVE = "mlflow models serve -m %s --port %s -h 0.0.0.0"; @@ -94,20 +87,10 @@ public class MlflowConstants { + "--health-cmd \"curl --fail http://127.0.0.1:8080/ping || exit 1\" --health-interval 5s --health-retries 20" + " %s"; - public static final String DOCKER_COMPOSE_RUN = "docker-compose up -d"; - - public static final String SET_DOCKER_COMPOSE_ENV = "export DS_TASK_MLFLOW_IMAGE_NAME=%s\n" - + "export DS_TASK_MLFLOW_CONTAINER_NAME=%s\n" - + "export DS_TASK_MLFLOW_DEPLOY_PORT=%s\n" - + "export DS_TASK_MLFLOW_CPU_LIMIT=%s\n" - + "export DS_TASK_MLFLOW_MEMORY_LIMIT=%s"; - - public static final String DOCKER_HEALTH_CHECK = "docker inspect --format \"{{json .State.Health.Status }}\" %s"; public static final int DOCKER_HEALTH_CHECK_TIMEOUT = 20; public static final int DOCKER_HEALTH_CHECK_INTERVAL = 5000; - public static final String GIT_CLONE_REPO = "git clone %s %s"; } diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowParameters.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowParameters.java index 4e47c8ae64..819cb27bd8 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowParameters.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowParameters.java @@ -17,10 +17,14 @@ package org.apache.dolphinscheduler.plugin.task.mlflow; +import lombok.Data; + import org.apache.dolphinscheduler.plugin.task.api.parameters.AbstractParameters; +import org.apache.dolphinscheduler.spi.utils.StringUtils; import java.util.HashMap; +@Data public class MlflowParameters extends AbstractParameters { /** @@ -36,7 +40,7 @@ public class MlflowParameters extends AbstractParameters { */ private String mlflowProjectRepository; - private String mlflowProjectVersion = "master"; + private String mlflowProjectVersion = ""; /** * AutoML parameters @@ -76,160 +80,9 @@ public class MlflowParameters extends AbstractParameters { private String deployPort; - private String cpuLimit; - - private String memoryLimit; - - public void setAlgorithm(String algorithm) { - this.algorithm = algorithm; - } - - public String getAlgorithm() { - return algorithm; - } - - public void setParams(String params) { - this.params = params; - } - - public String getParams() { - return params; - } - - public void setSearchParams(String searchParams) { - this.searchParams = searchParams; - } - - public String getSearchParams() { - return searchParams; - } - - public void setDataPaths(String dataPath) { - this.dataPath = dataPath; - } - - public String getDataPath() { - return dataPath; - } - - public void setMlflowTaskType(String mlflowTaskType) { - this.mlflowTaskType = mlflowTaskType; - } - - public String getMlflowTaskType() { - return mlflowTaskType; - } - - public void setExperimentNames(String experimentName) { - this.experimentName = experimentName; - } - - public String getExperimentName() { - return experimentName; - } - - public void setModelNames(String modelName) { - this.modelName = modelName; - } - - public String getModelName() { - return modelName; - } - - public void setMlflowTrackingUris(String mlflowTrackingUri) { - this.mlflowTrackingUri = mlflowTrackingUri; - } - - public String getMlflowTrackingUri() { - return mlflowTrackingUri; - } - - public void setMlflowJobType(String mlflowJobType) { - this.mlflowJobType = mlflowJobType; - } - - public String getMlflowJobType() { - return mlflowJobType; - } - - public void setAutomlTool(String automlTool) { - this.automlTool = automlTool; - } - - public String getMlflowProjectRepository() { - return mlflowProjectRepository; - } - - public void setMlflowProjectRepository(String mlflowProjectRepository) { - this.mlflowProjectRepository = mlflowProjectRepository; - } - - public String getMlflowProjectVersion() { - return mlflowProjectVersion; - } - - public void setMlflowProjectVersion(String mlflowProjectVersion) { - this.mlflowProjectVersion = mlflowProjectVersion; - } - - public String getAutomlTool() { - return automlTool; - } - - public void setDeployType(String deployType) { - this.deployType = deployType; - } - - public String getDeployType() { - return deployType; - } - - public void setDeployModelKey(String deployModelKey) { - this.deployModelKey = deployModelKey; - } - - public String getDeployModelKey() { - return deployModelKey; - } - - public void setDeployPort(String deployPort) { - this.deployPort = deployPort; - } - - public String getDeployPort() { - return deployPort; - } - - public void setCpuLimit(String cpuLimit) { - this.cpuLimit = cpuLimit; - } - - public String getCpuLimit() { - return cpuLimit; - } - - public void setMemoryLimit(String memoryLimit) { - this.memoryLimit = memoryLimit; - } - - public String getMemoryLimit() { - return memoryLimit; - } - @Override public boolean checkParameters() { - Boolean checkResult = true; -// Boolean checkResult = mlflowTrackingUri != null; -// if (mlflowJobType.equals(MlflowConstants.JOB_TYPE_BASIC_ALGORITHM)) { -// checkResult &= dataPath != null; -// checkResult &= experimentName != null; -// } else if (mlflowJobType.equals(MlflowConstants.JOB_TYPE_AUTOML)) { -// checkResult &= dataPath != null; -// checkResult &= automlTool != null; -// checkResult &= experimentName != null; -// } else { -// } - return checkResult; + return StringUtils.isNotEmpty(mlflowTrackingUri); } public HashMap getParamsMap() { @@ -240,11 +93,13 @@ public class MlflowParameters extends AbstractParameters { paramsMap.put("experiment_name", experimentName); paramsMap.put("model_name", modelName); paramsMap.put("MLFLOW_TRACKING_URI", mlflowTrackingUri); - if (mlflowJobType.equals(MlflowConstants.JOB_TYPE_BASIC_ALGORITHM)) { - addParamsMapForBasicAlgorithm(paramsMap); - } else if (mlflowJobType.equals(MlflowConstants.JOB_TYPE_AUTOML)) { - getParamsMapForAutoML(paramsMap); - } else { + switch (mlflowJobType){ + case MlflowConstants.JOB_TYPE_BASIC_ALGORITHM: + addParamsMapForBasicAlgorithm(paramsMap); + break; + case MlflowConstants.JOB_TYPE_AUTOML: + getParamsMapForAutoML(paramsMap); + break; } return paramsMap; } @@ -262,6 +117,10 @@ public class MlflowParameters extends AbstractParameters { paramsMap.put("repo_version", MlflowConstants.PRESET_REPOSITORY_VERSION); } + public Boolean isCustomProject() { + return mlflowJobType.equals(MlflowConstants.JOB_TYPE_CUSTOM_PROJECT); + } + public String getModelKeyName(String tag) throws IllegalArgumentException { String imageName; if (deployModelKey.startsWith("runs:")) { @@ -271,23 +130,18 @@ public class MlflowParameters extends AbstractParameters { } else { throw new IllegalArgumentException("model key must start with runs:/ or models:/ "); } - imageName = imageName.replace("/", tag); + imageName = imageName.replace("/", tag).toLowerCase(); return imageName; } - public String getDockerComposeEnvCommand() { - String imageName = "mlflow/" + getModelKeyName(":"); - String env = String.format(MlflowConstants.SET_DOCKER_COMPOSE_ENV, imageName, getContainerName(), deployPort, cpuLimit, memoryLimit); - return env; - } - public String getContainerName(){ - String containerName = "ds-mlflow-" + getModelKeyName("-"); - return containerName; + return "ds-mlflow-" + getModelKeyName("-"); } public boolean getIsDeployDocker(){ - return deployType.equals(MlflowConstants.MLFLOW_MODELS_DEPLOY_TYPE_DOCKER) || deployType.equals(MlflowConstants.MLFLOW_MODELS_DEPLOY_TYPE_DOCKER_COMPOSE); + if (StringUtils.isEmpty(deployType)) { + return false; + } + return deployType.equals(MlflowConstants.MLFLOW_MODELS_DEPLOY_TYPE_DOCKER); } - -}; +} diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowTask.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowTask.java index 0c42a5c09b..26e7e260a4 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowTask.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/java/org/apache/dolphinscheduler/plugin/task/mlflow/MlflowTask.java @@ -17,6 +17,8 @@ package org.apache.dolphinscheduler.plugin.task.mlflow; +import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.EXIT_CODE_FAILURE; + import org.apache.dolphinscheduler.common.thread.ThreadUtils; import org.apache.dolphinscheduler.plugin.task.api.AbstractTaskExecutor; import org.apache.dolphinscheduler.plugin.task.api.ShellCommandExecutor; @@ -25,28 +27,24 @@ import org.apache.dolphinscheduler.plugin.task.api.TaskException; import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext; import org.apache.dolphinscheduler.plugin.task.api.model.Property; import org.apache.dolphinscheduler.plugin.task.api.model.TaskResponse; -import org.apache.dolphinscheduler.plugin.task.api.parameters.AbstractParameters; import org.apache.dolphinscheduler.plugin.task.api.parser.ParamUtils; import org.apache.dolphinscheduler.plugin.task.api.parser.ParameterUtils; import org.apache.dolphinscheduler.plugin.task.api.utils.OSUtils; import org.apache.dolphinscheduler.spi.utils.JSONUtils; +import org.apache.dolphinscheduler.spi.utils.PropertyUtils; +import org.apache.dolphinscheduler.spi.utils.StringUtils; import java.util.ArrayList; import java.util.List; import java.util.Map; - -import static org.apache.dolphinscheduler.plugin.task.api.TaskConstants.EXIT_CODE_FAILURE; +import java.util.regex.Pattern; /** * shell task */ public class MlflowTask extends AbstractTaskExecutor { - /** - * shell parameters - */ - private MlflowParameters mlflowParameters; - + private static final Pattern GIT_CHECK_PATTERN = Pattern.compile("^(git@|https?://)"); /** * shell command executor */ @@ -56,6 +54,10 @@ public class MlflowTask extends AbstractTaskExecutor { * taskExecutionContext */ private final TaskExecutionContext taskExecutionContext; + /** + * shell parameters + */ + private MlflowParameters mlflowParameters; /** * constructor @@ -69,6 +71,34 @@ public class MlflowTask extends AbstractTaskExecutor { this.shellCommandExecutor = new ShellCommandExecutor(this::logHandle, taskExecutionContext, logger); } + static public String getPresetRepository() { + String presetRepository = PropertyUtils.getString(MlflowConstants.PRESET_REPOSITORY_KEY); + if (StringUtils.isEmpty(presetRepository)) { + presetRepository = MlflowConstants.PRESET_REPOSITORY; + } + return presetRepository; + } + + static public String getPresetRepositoryVersion() { + String version = PropertyUtils.getString(MlflowConstants.PRESET_REPOSITORY_VERSION_KEY); + if (StringUtils.isEmpty(version)) { + version = MlflowConstants.PRESET_REPOSITORY_VERSION; + } + return version; + } + + static public String getVersionString(String version, String repository) { + String versionString; + if (StringUtils.isEmpty(version)) { + versionString = ""; + } else if (GIT_CHECK_PATTERN.matcher(repository).find()) { + versionString = String.format("--version=%s", version); + } else { + versionString = ""; + } + return versionString; + } + @Override public void init() { logger.info("shell task params {}", taskExecutionContext.getTaskParams()); @@ -137,43 +167,59 @@ public class MlflowTask extends AbstractTaskExecutor { args.add(String.format(MlflowConstants.EXPORT_MLFLOW_TRACKING_URI_ENV, mlflowParameters.getMlflowTrackingUri())); String runCommand; + String versionString; - if (mlflowParameters.getMlflowJobType().equals(MlflowConstants.JOB_TYPE_BASIC_ALGORITHM)) { - args.add(String.format(MlflowConstants.SET_DATA_PATH, mlflowParameters.getDataPath())); - args.add(String.format(MlflowConstants.SET_REPOSITORY, MlflowConstants.PRESET_BASIC_ALGORITHM_PROJECT)); - args.add(String.format(MlflowConstants.GIT_CLONE_REPO, MlflowConstants.PRESET_REPOSITORY, MlflowConstants.PRESET_PATH)); + if (mlflowParameters.isCustomProject()) { + versionString = getVersionString(mlflowParameters.getMlflowProjectVersion(), mlflowParameters.getMlflowProjectRepository()); + } else { + versionString = getVersionString(getPresetRepositoryVersion(), getPresetRepository()); + } - runCommand = MlflowConstants.MLFLOW_RUN_BASIC_ALGORITHM; - runCommand = String.format(runCommand, mlflowParameters.getAlgorithm(), mlflowParameters.getParams(), mlflowParameters.getSearchParams(), mlflowParameters.getModelName(), - mlflowParameters.getExperimentName()); - } else if (mlflowParameters.getMlflowJobType().equals(MlflowConstants.JOB_TYPE_AUTOML)) { - args.add(String.format(MlflowConstants.SET_DATA_PATH, mlflowParameters.getDataPath())); - args.add(String.format(MlflowConstants.SET_REPOSITORY, MlflowConstants.PRESET_AUTOML_PROJECT)); - args.add(String.format(MlflowConstants.GIT_CLONE_REPO, MlflowConstants.PRESET_REPOSITORY, MlflowConstants.PRESET_PATH)); + switch (mlflowParameters.getMlflowJobType()) { + case MlflowConstants.JOB_TYPE_BASIC_ALGORITHM: + args.add(String.format(MlflowConstants.SET_DATA_PATH, mlflowParameters.getDataPath())); - runCommand = MlflowConstants.MLFLOW_RUN_AUTOML_PROJECT; - runCommand = String.format(runCommand, mlflowParameters.getAutomlTool(), mlflowParameters.getParams(), mlflowParameters.getModelName(), mlflowParameters.getExperimentName()); + String repoBasicAlgorithm = getPresetRepository() + MlflowConstants.PRESET_BASIC_ALGORITHM_PROJECT; + args.add(String.format(MlflowConstants.SET_REPOSITORY, repoBasicAlgorithm)); - } else if (mlflowParameters.getMlflowJobType().equals(MlflowConstants.JOB_TYPE_CUSTOM_PROJECT)) { - args.add(String.format(MlflowConstants.SET_REPOSITORY, mlflowParameters.getMlflowProjectRepository())); + runCommand = MlflowConstants.MLFLOW_RUN_BASIC_ALGORITHM; + runCommand = String.format(runCommand, mlflowParameters.getAlgorithm(), mlflowParameters.getParams(), mlflowParameters.getSearchParams(), mlflowParameters.getModelName(), + mlflowParameters.getExperimentName()); + break; - runCommand = MlflowConstants.MLFLOW_RUN_CUSTOM_PROJECT; - runCommand = String.format(runCommand, mlflowParameters.getParams(), mlflowParameters.getExperimentName(), mlflowParameters.getMlflowProjectVersion()); - } else { - runCommand = String.format("Cant not Support %s", mlflowParameters.getMlflowJobType()); + case MlflowConstants.JOB_TYPE_AUTOML: + args.add(String.format(MlflowConstants.SET_DATA_PATH, mlflowParameters.getDataPath())); + String repoAutoML = getPresetRepository() + MlflowConstants.PRESET_AUTOML_PROJECT; + args.add(String.format(MlflowConstants.SET_REPOSITORY, repoAutoML)); + runCommand = MlflowConstants.MLFLOW_RUN_AUTOML_PROJECT; + runCommand = String.format(runCommand, mlflowParameters.getAutomlTool(), mlflowParameters.getParams(), mlflowParameters.getModelName(), mlflowParameters.getExperimentName()); + break; + + case MlflowConstants.JOB_TYPE_CUSTOM_PROJECT: + args.add(String.format(MlflowConstants.SET_REPOSITORY, mlflowParameters.getMlflowProjectRepository())); + runCommand = MlflowConstants.MLFLOW_RUN_CUSTOM_PROJECT; + runCommand = String.format(runCommand, mlflowParameters.getParams(), mlflowParameters.getExperimentName()); + break; + + default: + throw new TaskException("Unsupported mlflow job type: " + mlflowParameters.getMlflowJobType()); + } + + // add version string to command if repository is local path + if (StringUtils.isNotEmpty(versionString)) { + runCommand = runCommand + " " + versionString; } args.add(runCommand); - String command = ParameterUtils.convertParameterPlaceholders(String.join("\n", args), ParamUtils.convert(paramsMap)); - return command; + return ParameterUtils.convertParameterPlaceholders(String.join("\n", args), ParamUtils.convert(paramsMap)); } + /** + * build mlflow models command + */ protected String buildCommandForMlflowModels() { - /** - * build mlflow models command - */ Map paramsMap = getParamsMap(); List args = new ArrayList<>(); @@ -190,20 +236,9 @@ public class MlflowTask extends AbstractTaskExecutor { args.add(String.format(MlflowConstants.MLFLOW_BUILD_DOCKER, deployModelKey, imageName)); args.add(String.format(MlflowConstants.DOCKER_RREMOVE_CONTAINER, containerName)); args.add(String.format(MlflowConstants.DOCKER_RUN, containerName, mlflowParameters.getDeployPort(), imageName)); - } else if (mlflowParameters.getDeployType().equals(MlflowConstants.MLFLOW_MODELS_DEPLOY_TYPE_DOCKER_COMPOSE)) { - String templatePath = getTemplatePath(MlflowConstants.TEMPLATE_DOCKER_COMPOSE); - args.add(String.format("cp %s %s", templatePath, taskExecutionContext.getExecutePath())); - String imageName = "mlflow/" + mlflowParameters.getModelKeyName(":"); - String containerName = mlflowParameters.getContainerName(); - - args.add(String.format(MlflowConstants.MLFLOW_BUILD_DOCKER, deployModelKey, imageName)); - args.add(String.format(MlflowConstants.DOCKER_RREMOVE_CONTAINER, containerName)); - args.add(mlflowParameters.getDockerComposeEnvCommand()); - args.add(MlflowConstants.DOCKER_COMPOSE_RUN); } - String command = ParameterUtils.convertParameterPlaceholders(String.join("\n", args), ParamUtils.convert(paramsMap)); - return command; + return ParameterUtils.convertParameterPlaceholders(String.join("\n", args), ParamUtils.convert(paramsMap)); } private Map getParamsMap() { @@ -212,7 +247,7 @@ public class MlflowTask extends AbstractTaskExecutor { } - public int checkDockerHealth() throws Exception { + public int checkDockerHealth() { logger.info("checking container healthy ... "); int exitCode = -1; String[] command = {"sh", "-c", String.format(MlflowConstants.DOCKER_HEALTH_CHECK, mlflowParameters.getContainerName())}; @@ -240,13 +275,8 @@ public class MlflowTask extends AbstractTaskExecutor { } @Override - public AbstractParameters getParameters() { + public MlflowParameters getParameters() { return mlflowParameters; } - public String getTemplatePath(String template) { - String templatePath = MlflowTask.class.getClassLoader().getResource(template).getPath(); - return templatePath; - } - } diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/resources/docker-compose.yml b/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/resources/docker-compose.yml deleted file mode 100644 index f9211b9bae..0000000000 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/main/resources/docker-compose.yml +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: "3" - -services: - mlflow-model: - image: "${DS_TASK_MLFLOW_IMAGE_NAME}" - container_name: "${DS_TASK_MLFLOW_CONTAINER_NAME}" - ports: - - "${DS_TASK_MLFLOW_DEPLOY_PORT}:8080" - deploy: - resources: - limits: - cpus: "${DS_TASK_MLFLOW_CPU_LIMIT}" - memory: "${DS_TASK_MLFLOW_MEMORY_LIMIT}" - - environment: - PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python - - - healthcheck: - test: ["CMD", "curl", "http://127.0.0.1:8080/ping"] - interval: 5s - timeout: 5s - retries: 5 \ No newline at end of file diff --git a/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/test/java/org/apache/dolphinler/plugin/task/mlflow/MlflowTaskTest.java b/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/test/java/org/apache/dolphinler/plugin/task/mlflow/MlflowTaskTest.java index ea29d3b4bd..5f0c1aa581 100644 --- a/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/test/java/org/apache/dolphinler/plugin/task/mlflow/MlflowTaskTest.java +++ b/dolphinscheduler-task-plugin/dolphinscheduler-task-mlflow/src/test/java/org/apache/dolphinler/plugin/task/mlflow/MlflowTaskTest.java @@ -17,6 +17,8 @@ package org.apache.dolphinler.plugin.task.mlflow; +import static org.powermock.api.mockito.PowerMockito.when; + import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContext; import org.apache.dolphinscheduler.plugin.task.api.TaskExecutionContextCacheManager; import org.apache.dolphinscheduler.plugin.task.mlflow.MlflowConstants; @@ -76,21 +78,46 @@ public class MlflowTaskTest { return taskExecutionContext; } + @Test + public void testGetPresetRepositoryData() { + Assert.assertEquals("https://github.com/apache/dolphinscheduler-mlflow", MlflowTask.getPresetRepository()); + + Assert.assertEquals("main", MlflowTask.getPresetRepositoryVersion()); + + String definedRepository = "https://github.com//dolphinscheduler-mlflow"; + when(PropertyUtils.getString(MlflowConstants.PRESET_REPOSITORY_KEY)).thenAnswer(invocation -> definedRepository); + Assert.assertEquals(definedRepository, MlflowTask.getPresetRepository()); + + String definedRepositoryVersion = "dev"; + when(PropertyUtils.getString(MlflowConstants.PRESET_REPOSITORY_VERSION_KEY)).thenAnswer(invocation -> definedRepositoryVersion); + Assert.assertEquals(definedRepositoryVersion, MlflowTask.getPresetRepositoryVersion()); + } + + @Test + public void testGetVersionString() { + Assert.assertEquals("--version=main", MlflowTask.getVersionString("main", "https://github.com/apache/dolphinscheduler-mlflow")); + Assert.assertEquals("--version=master", MlflowTask.getVersionString("master", "https://github.com/apache/dolphinscheduler-mlflow")); + Assert.assertEquals("--version=main", MlflowTask.getVersionString("main", "git@github.com:apache/dolphinscheduler-mlflow.git")); + Assert.assertEquals("--version=master", MlflowTask.getVersionString("master", "git@github.com:apache/dolphinscheduler-mlflow.git")); + Assert.assertEquals("", MlflowTask.getVersionString("main", "/tmp/dolphinscheduler-mlflow")); + Assert.assertEquals("", MlflowTask.getVersionString("master", "/tmp/dolphinscheduler-mlflow")); + } + @Test public void testInitBasicAlgorithmTask() { MlflowTask mlflowTask = initTask(createBasicAlgorithmParameters()); Assert.assertEquals(mlflowTask.buildCommand(), "export MLFLOW_TRACKING_URI=http://127.0.0.1:5000\n" + "data_path=/data/iris.csv\n" - + "repo=dolphinscheduler-mlflow#Project-BasicAlgorithm\n" - + "git clone https://github.com/apache/dolphinscheduler-mlflow dolphinscheduler-mlflow\n" + + "repo=https://github.com/apache/dolphinscheduler-mlflow#Project-BasicAlgorithm\n" + "mlflow run $repo " + "-P algorithm=xgboost " + "-P data_path=$data_path " + "-P params=\"n_estimators=100\" " + "-P search_params=\"\" " + "-P model_name=\"BasicAlgorithm\" " - + "--experiment-name=\"BasicAlgorithm\""); + + "--experiment-name=\"BasicAlgorithm\" " + + "--version=main"); } @Test @@ -99,19 +126,32 @@ public class MlflowTaskTest { Assert.assertEquals(mlflowTask.buildCommand(), "export MLFLOW_TRACKING_URI=http://127.0.0.1:5000\n" + "data_path=/data/iris.csv\n" - + "repo=dolphinscheduler-mlflow#Project-AutoML\n" - + "git clone https://github.com/apache/dolphinscheduler-mlflow dolphinscheduler-mlflow\n" + + "repo=https://github.com/apache/dolphinscheduler-mlflow#Project-AutoML\n" + "mlflow run $repo " + "-P tool=autosklearn " + "-P data_path=$data_path " + "-P params=\"time_left_for_this_task=30\" " + "-P model_name=\"AutoML\" " - + "--experiment-name=\"AutoML\""); + + "--experiment-name=\"AutoML\" " + + "--version=main"); } @Test public void testInitCustomProjectTask() { MlflowTask mlflowTask = initTask(createCustomProjectParameters()); + + // Version will be set if parameter.mlflowProjectVersion is empty + Assert.assertEquals(mlflowTask.buildCommand(), + "export MLFLOW_TRACKING_URI=http://127.0.0.1:5000\n" + + "repo=https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native\n" + + "mlflow run $repo " + + "-P learning_rate=0.2 " + + "-P colsample_bytree=0.8 " + + "-P subsample=0.9 " + + "--experiment-name=\"custom_project\""); + + // Version will be set if repository is remote path + mlflowTask.getParameters().setMlflowProjectVersion("dev"); Assert.assertEquals(mlflowTask.buildCommand(), "export MLFLOW_TRACKING_URI=http://127.0.0.1:5000\n" + "repo=https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native\n" @@ -120,7 +160,19 @@ public class MlflowTaskTest { + "-P colsample_bytree=0.8 " + "-P subsample=0.9 " + "--experiment-name=\"custom_project\" " - + "--version=\"master\" "); + + "--version=dev"); + + // Version will not be set if repository is local path + mlflowTask.getParameters().setMlflowProjectRepository("/tmp/dolphinscheduler-mlflow"); + Assert.assertEquals(mlflowTask.buildCommand(), + "export MLFLOW_TRACKING_URI=http://127.0.0.1:5000\n" + + "repo=/tmp/dolphinscheduler-mlflow\n" + + "mlflow run $repo " + + "-P learning_rate=0.2 " + + "-P colsample_bytree=0.8 " + + "-P subsample=0.9 " + + "--experiment-name=\"custom_project\""); + } @Test @@ -143,24 +195,6 @@ public class MlflowTaskTest { + "mlflow/model:1"); } - @Test - public void testModelsDeployDockerCompose() throws Exception { - MlflowTask mlflowTask = initTask(createModelDeplyDockerComposeParameters()); - Assert.assertEquals(mlflowTask.buildCommand(), - "export MLFLOW_TRACKING_URI=http://127.0.0.1:5000\n" - + "cp " - + mlflowTask.getTemplatePath(MlflowConstants.TEMPLATE_DOCKER_COMPOSE) - + " /tmp/dolphinscheduler_test\n" - + "mlflow models build-docker -m models:/model/1 -n mlflow/model:1 --enable-mlserver\n" - + "docker rm -f ds-mlflow-model-1\n" - + "export DS_TASK_MLFLOW_IMAGE_NAME=mlflow/model:1\n" - + "export DS_TASK_MLFLOW_CONTAINER_NAME=ds-mlflow-model-1\n" - + "export DS_TASK_MLFLOW_DEPLOY_PORT=7000\n" - + "export DS_TASK_MLFLOW_CPU_LIMIT=0.5\n" - + "export DS_TASK_MLFLOW_MEMORY_LIMIT=200m\n" - + "docker-compose up -d"); - } - private MlflowTask initTask(MlflowParameters mlflowParameters) { TaskExecutionContext taskExecutionContext = createContext(mlflowParameters); MlflowTask mlflowTask = new MlflowTask(taskExecutionContext); @@ -174,11 +208,11 @@ public class MlflowTaskTest { mlflowParameters.setMlflowTaskType(MlflowConstants.MLFLOW_TASK_TYPE_PROJECTS); mlflowParameters.setMlflowJobType(MlflowConstants.JOB_TYPE_BASIC_ALGORITHM); mlflowParameters.setAlgorithm("xgboost"); - mlflowParameters.setDataPaths("/data/iris.csv"); + mlflowParameters.setDataPath("/data/iris.csv"); mlflowParameters.setParams("n_estimators=100"); - mlflowParameters.setExperimentNames("BasicAlgorithm"); - mlflowParameters.setModelNames("BasicAlgorithm"); - mlflowParameters.setMlflowTrackingUris("http://127.0.0.1:5000"); + mlflowParameters.setExperimentName("BasicAlgorithm"); + mlflowParameters.setModelName("BasicAlgorithm"); + mlflowParameters.setMlflowTrackingUri("http://127.0.0.1:5000"); return mlflowParameters; } @@ -188,10 +222,10 @@ public class MlflowTaskTest { mlflowParameters.setMlflowJobType(MlflowConstants.JOB_TYPE_AUTOML); mlflowParameters.setAutomlTool("autosklearn"); mlflowParameters.setParams("time_left_for_this_task=30"); - mlflowParameters.setDataPaths("/data/iris.csv"); - mlflowParameters.setExperimentNames("AutoML"); - mlflowParameters.setModelNames("AutoML"); - mlflowParameters.setMlflowTrackingUris("http://127.0.0.1:5000"); + mlflowParameters.setDataPath("/data/iris.csv"); + mlflowParameters.setExperimentName("AutoML"); + mlflowParameters.setModelName("AutoML"); + mlflowParameters.setMlflowTrackingUri("http://127.0.0.1:5000"); return mlflowParameters; } @@ -199,8 +233,8 @@ public class MlflowTaskTest { MlflowParameters mlflowParameters = new MlflowParameters(); mlflowParameters.setMlflowTaskType(MlflowConstants.MLFLOW_TASK_TYPE_PROJECTS); mlflowParameters.setMlflowJobType(MlflowConstants.JOB_TYPE_CUSTOM_PROJECT); - mlflowParameters.setMlflowTrackingUris("http://127.0.0.1:5000"); - mlflowParameters.setExperimentNames("custom_project"); + mlflowParameters.setMlflowTrackingUri("http://127.0.0.1:5000"); + mlflowParameters.setExperimentName("custom_project"); mlflowParameters.setParams("-P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9"); mlflowParameters.setMlflowProjectRepository("https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native"); @@ -211,7 +245,7 @@ public class MlflowTaskTest { MlflowParameters mlflowParameters = new MlflowParameters(); mlflowParameters.setMlflowTaskType(MlflowConstants.MLFLOW_TASK_TYPE_MODELS); mlflowParameters.setDeployType(MlflowConstants.MLFLOW_MODELS_DEPLOY_TYPE_MLFLOW); - mlflowParameters.setMlflowTrackingUris("http://127.0.0.1:5000"); + mlflowParameters.setMlflowTrackingUri("http://127.0.0.1:5000"); mlflowParameters.setDeployModelKey("models:/model/1"); mlflowParameters.setDeployPort("7000"); return mlflowParameters; @@ -221,21 +255,9 @@ public class MlflowTaskTest { MlflowParameters mlflowParameters = new MlflowParameters(); mlflowParameters.setMlflowTaskType(MlflowConstants.MLFLOW_TASK_TYPE_MODELS); mlflowParameters.setDeployType(MlflowConstants.MLFLOW_MODELS_DEPLOY_TYPE_DOCKER); - mlflowParameters.setMlflowTrackingUris("http://127.0.0.1:5000"); - mlflowParameters.setDeployModelKey("models:/model/1"); - mlflowParameters.setDeployPort("7000"); - return mlflowParameters; - } - - private MlflowParameters createModelDeplyDockerComposeParameters() { - MlflowParameters mlflowParameters = new MlflowParameters(); - mlflowParameters.setMlflowTaskType(MlflowConstants.MLFLOW_TASK_TYPE_MODELS); - mlflowParameters.setDeployType(MlflowConstants.MLFLOW_MODELS_DEPLOY_TYPE_DOCKER_COMPOSE); - mlflowParameters.setMlflowTrackingUris("http://127.0.0.1:5000"); + mlflowParameters.setMlflowTrackingUri("http://127.0.0.1:5000"); mlflowParameters.setDeployModelKey("models:/model/1"); mlflowParameters.setDeployPort("7000"); - mlflowParameters.setCpuLimit("0.5"); - mlflowParameters.setMemoryLimit("200m"); return mlflowParameters; } } diff --git a/dolphinscheduler-ui/src/locales/en_US/project.ts b/dolphinscheduler-ui/src/locales/en_US/project.ts index fa599a466c..1242d17af4 100644 --- a/dolphinscheduler-ui/src/locales/en_US/project.ts +++ b/dolphinscheduler-ui/src/locales/en_US/project.ts @@ -710,7 +710,7 @@ export default { mlflow_deployModelKey: 'Model-URI', mlflow_deployPort: 'Port', mlflowProjectRepository: 'Repository', - mlflowProjectRepository_tips: 'github respository or path on worker', + mlflowProjectRepository_tips: 'git respository or path on worker', mlflowProjectVersion: 'Project Version', mlflowProjectVersion_tips: 'git version', mlflow_cpuLimit: 'Max Cpu Limit', diff --git a/dolphinscheduler-ui/src/locales/zh_CN/project.ts b/dolphinscheduler-ui/src/locales/zh_CN/project.ts index a08d0dc678..50873d8771 100644 --- a/dolphinscheduler-ui/src/locales/zh_CN/project.ts +++ b/dolphinscheduler-ui/src/locales/zh_CN/project.ts @@ -693,7 +693,7 @@ export default { mlflow_deployModelKey: '部署的模型URI', mlflow_deployPort: '监听端口', mlflowProjectRepository: '运行仓库', - mlflowProjectRepository_tips: '可以为github仓库或worker上的路径', + mlflowProjectRepository_tips: '可以为git仓库或worker上的路径', mlflowProjectVersion: '项目版本', mlflowProjectVersion_tips: '项目git版本', mlflow_cpuLimit: '最大cpu限制', diff --git a/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-mlflow-models.ts b/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-mlflow-models.ts index 9939bbb5aa..eef3837fde 100644 --- a/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-mlflow-models.ts +++ b/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-mlflow-models.ts @@ -23,8 +23,6 @@ export function useMlflowModels(model: { [field: string]: any }): IJsonItem[] { const deployTypeSpan = ref(0) const deployModelKeySpan = ref(0) const deployPortSpan = ref(0) - const cpuLimitSpan = ref(0) - const memoryLimitSpan = ref(0) const setFlag = () => { model.isModels = model.mlflowTaskType === 'MLflow Models' ? true : false @@ -44,14 +42,6 @@ export function useMlflowModels(model: { [field: string]: any }): IJsonItem[] { } ) - watch( - () => [model.deployType], - () => { - cpuLimitSpan.value = model.deployType === 'DOCKER COMPOSE' ? 12 : 0 - memoryLimitSpan.value = model.deployType === 'DOCKER COMPOSE' ? 12 : 0 - } - ) - setFlag() resetSpan() @@ -74,18 +64,6 @@ export function useMlflowModels(model: { [field: string]: any }): IJsonItem[] { field: 'deployPort', name: t('project.node.mlflow_deployPort'), span: deployPortSpan - }, - { - type: 'input', - field: 'cpuLimit', - name: t('project.node.mlflow_cpuLimit'), - span: cpuLimitSpan - }, - { - type: 'input', - field: 'memoryLimit', - name: t('project.node.mlflow_memoryLimit'), - span: memoryLimitSpan } ] } @@ -98,9 +76,5 @@ const DEPLOY_TYPE = [ { label: 'DOCKER', value: 'DOCKER' - }, - { - label: 'DOCKER COMPOSE', - value: 'DOCKER COMPOSE' } ] diff --git a/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-mlflow-projects.ts b/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-mlflow-projects.ts index ea033cbb83..c312ee435e 100644 --- a/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-mlflow-projects.ts +++ b/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-mlflow-projects.ts @@ -280,16 +280,16 @@ export function useCustomProject(model: { [field: string]: any }): IJsonItem[] { export const MLFLOW_JOB_TYPE = [ { - label: 'BasicAlgorithm', - value: 'BasicAlgorithm' + label: 'Custom Project', + value: 'CustomProject' }, { label: 'AutoML', value: 'AutoML' }, { - label: 'Custom Project', - value: 'CustomProject' + label: 'BasicAlgorithm', + value: 'BasicAlgorithm' } ] export const ALGORITHM = [ @@ -311,12 +311,12 @@ export const ALGORITHM = [ } ] export const AutoMLTOOL = [ - { - label: 'autosklearn', - value: 'autosklearn' - }, { label: 'flaml', value: 'flaml' + }, + { + label: 'autosklearn', + value: 'autosklearn' } ] diff --git a/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-mlflow.ts b/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-mlflow.ts index 66ce7cd8c2..efed24661f 100644 --- a/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-mlflow.ts +++ b/dolphinscheduler-ui/src/views/projects/task/components/node/fields/use-mlflow.ts @@ -17,6 +17,7 @@ import { useI18n } from 'vue-i18n' import type { IJsonItem } from '../types' import { useMlflowProjects, useMlflowModels } from '.' +import { useCustomParams, useResources } from '.' export const MLFLOW_TASK_TYPE = [ { @@ -61,6 +62,8 @@ export function useMlflow(model: { [field: string]: any }): IJsonItem[] { options: MLFLOW_TASK_TYPE }, ...useMlflowProjects(model), - ...useMlflowModels(model) + ...useMlflowModels(model), + useResources(), + ...useCustomParams({ model, field: 'localParams', isSimple: true }) ] } diff --git a/dolphinscheduler-ui/src/views/projects/task/components/node/format-data.ts b/dolphinscheduler-ui/src/views/projects/task/components/node/format-data.ts index 0d845fafc1..c7be65e8ca 100644 --- a/dolphinscheduler-ui/src/views/projects/task/components/node/format-data.ts +++ b/dolphinscheduler-ui/src/views/projects/task/components/node/format-data.ts @@ -369,8 +369,6 @@ export function formatParams(data: INodeData): { taskParams.deployModelKey = data.deployModelKey taskParams.mlflowProjectRepository = data.mlflowProjectRepository taskParams.mlflowProjectVersion = data.mlflowProjectVersion - taskParams.cpuLimit = data.cpuLimit - taskParams.memoryLimit = data.memoryLimit } if (data.taskType === 'DVC') { diff --git a/dolphinscheduler-ui/src/views/projects/task/components/node/tasks/use-mlflow.ts b/dolphinscheduler-ui/src/views/projects/task/components/node/tasks/use-mlflow.ts index e1957f138e..6c6ed21db1 100644 --- a/dolphinscheduler-ui/src/views/projects/task/components/node/tasks/use-mlflow.ts +++ b/dolphinscheduler-ui/src/views/projects/task/components/node/tasks/use-mlflow.ts @@ -47,10 +47,7 @@ export function useMlflow({ deployType: 'MLFLOW', deployPort: '7000', mlflowJobType: 'CustomProject', - mlflowProjectVersion: 'master', automlTool: 'flaml', - cpuLimit: '0.5', - memoryLimit: '500M', mlflowCustomProjectParameters: [], delayTime: 0, timeout: 30,