JieguangZhou
2 years ago
committed by
Jiajie Zhong
8 changed files with 712 additions and 0 deletions
@ -0,0 +1,42 @@
|
||||
.. Licensed to the Apache Software Foundation (ASF) under one |
||||
or more contributor license agreements. See the NOTICE file |
||||
distributed with this work for additional information |
||||
regarding copyright ownership. The ASF licenses this file |
||||
to you under the Apache License, Version 2.0 (the |
||||
"License"); you may not use this file except in compliance |
||||
with the License. You may obtain a copy of the License at |
||||
|
||||
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
||||
.. Unless required by applicable law or agreed to in writing, |
||||
software distributed under the License is distributed on an |
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||
KIND, either express or implied. See the License for the |
||||
specific language governing permissions and limitations |
||||
under the License. |
||||
|
||||
MLflow |
||||
========= |
||||
|
||||
|
||||
A MLflow task type's example and dive into information of **PyDolphinScheduler**. |
||||
|
||||
Example |
||||
------- |
||||
|
||||
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_mlflow_example.py |
||||
:start-after: [start workflow_declare] |
||||
:end-before: [end workflow_declare] |
||||
|
||||
Dive Into |
||||
--------- |
||||
|
||||
.. automodule:: pydolphinscheduler.tasks.mlflow |
||||
|
||||
|
||||
YAML file example |
||||
----------------- |
||||
|
||||
.. literalinclude:: ../../../examples/yaml_define/mlflow.yaml |
||||
:start-after: # under the License. |
||||
:language: yaml |
@ -0,0 +1,78 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one |
||||
# or more contributor license agreements. See the NOTICE file |
||||
# distributed with this work for additional information |
||||
# regarding copyright ownership. The ASF licenses this file |
||||
# to you under the Apache License, Version 2.0 (the |
||||
# "License"); you may not use this file except in compliance |
||||
# with the License. You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, |
||||
# software distributed under the License is distributed on an |
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||
# KIND, either express or implied. See the License for the |
||||
# specific language governing permissions and limitations |
||||
# under the License. |
||||
|
||||
|
||||
# Define variable `mlflow_tracking_uri` |
||||
mlflow_tracking_uri: &mlflow_tracking_uri "http://127.0.0.1:5000" |
||||
|
||||
# Define the workflow |
||||
workflow: |
||||
name: "MLflow" |
||||
|
||||
# Define the tasks under the workflow |
||||
tasks: |
||||
- name: train_xgboost_native |
||||
task_type: MLFlowProjectsCustom |
||||
repository: https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native |
||||
mlflow_tracking_uri: *mlflow_tracking_uri |
||||
parameters: -P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9 |
||||
experiment_name: xgboost |
||||
|
||||
|
||||
- name: deploy_mlflow |
||||
deps: [train_xgboost_native] |
||||
task_type: MLflowModels |
||||
model_uri: models:/xgboost_native/Production |
||||
mlflow_tracking_uri: *mlflow_tracking_uri |
||||
deploy_mode: MLFLOW |
||||
port: 7001 |
||||
|
||||
- name: train_automl |
||||
task_type: MLFlowProjectsAutoML |
||||
mlflow_tracking_uri: *mlflow_tracking_uri |
||||
parameters: time_budget=30;estimator_list=['lgbm'] |
||||
experiment_name: automl_iris |
||||
model_name: iris_A |
||||
automl_tool: flaml |
||||
data_path: /data/examples/iris |
||||
|
||||
- name: deploy_docker |
||||
task_type: MLflowModels |
||||
deps: [train_automl] |
||||
model_uri: models:/iris_A/Production |
||||
mlflow_tracking_uri: *mlflow_tracking_uri |
||||
deploy_mode: DOCKER |
||||
port: 7002 |
||||
|
||||
- name: train_basic_algorithm |
||||
task_type: MLFlowProjectsBasicAlgorithm |
||||
mlflow_tracking_uri: *mlflow_tracking_uri |
||||
parameters: n_estimators=200;learning_rate=0.2 |
||||
experiment_name: basic_algorithm_iris |
||||
model_name: iris_B |
||||
algorithm: lightgbm |
||||
data_path: /data/examples/iris |
||||
search_params: max_depth=[5, 10];n_estimators=[100, 200] |
||||
|
||||
|
||||
- name: deploy_docker_compose |
||||
task_type: MLflowModels |
||||
deps: [train_basic_algorithm] |
||||
model_uri: models:/iris_B/Production |
||||
mlflow_tracking_uri: *mlflow_tracking_uri |
||||
deploy_mode: DOCKER COMPOSE |
||||
port: 7003 |
@ -0,0 +1,104 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one |
||||
# or more contributor license agreements. See the NOTICE file |
||||
# distributed with this work for additional information |
||||
# regarding copyright ownership. The ASF licenses this file |
||||
# to you under the Apache License, Version 2.0 (the |
||||
# "License"); you may not use this file except in compliance |
||||
# with the License. You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, |
||||
# software distributed under the License is distributed on an |
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||
# KIND, either express or implied. See the License for the |
||||
# specific language governing permissions and limitations |
||||
# under the License. |
||||
|
||||
# [start workflow_declare] |
||||
"""A example workflow for task mlflow.""" |
||||
|
||||
from pydolphinscheduler.core.process_definition import ProcessDefinition |
||||
from pydolphinscheduler.tasks.mlflow import ( |
||||
MLflowDeployType, |
||||
MLflowModels, |
||||
MLFlowProjectsAutoML, |
||||
MLFlowProjectsBasicAlgorithm, |
||||
MLFlowProjectsCustom, |
||||
) |
||||
|
||||
mlflow_tracking_uri = "http://127.0.0.1:5000" |
||||
|
||||
with ProcessDefinition( |
||||
name="task_mlflow_example", |
||||
tenant="tenant_exists", |
||||
) as pd: |
||||
|
||||
# run custom mlflow project to train model |
||||
train_custom = MLFlowProjectsCustom( |
||||
name="train_xgboost_native", |
||||
repository="https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native", |
||||
mlflow_tracking_uri=mlflow_tracking_uri, |
||||
parameters="-P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9", |
||||
experiment_name="xgboost", |
||||
) |
||||
|
||||
# Using MLFLOW to deploy model from custom mlflow project |
||||
deploy_mlflow = MLflowModels( |
||||
name="deploy_mlflow", |
||||
model_uri="models:/xgboost_native/Production", |
||||
mlflow_tracking_uri=mlflow_tracking_uri, |
||||
deploy_mode=MLflowDeployType.MLFLOW, |
||||
port=7001, |
||||
) |
||||
|
||||
train_custom >> deploy_mlflow |
||||
|
||||
# run automl to train model |
||||
train_automl = MLFlowProjectsAutoML( |
||||
name="train_automl", |
||||
mlflow_tracking_uri=mlflow_tracking_uri, |
||||
parameters="time_budget=30;estimator_list=['lgbm']", |
||||
experiment_name="automl_iris", |
||||
model_name="iris_A", |
||||
automl_tool="flaml", |
||||
data_path="/data/examples/iris", |
||||
) |
||||
|
||||
# Using DOCKER to deploy model from train_automl |
||||
deploy_docker = MLflowModels( |
||||
name="deploy_docker", |
||||
model_uri="models:/iris_A/Production", |
||||
mlflow_tracking_uri=mlflow_tracking_uri, |
||||
deploy_mode=MLflowDeployType.DOCKER, |
||||
port=7002, |
||||
) |
||||
|
||||
train_automl >> deploy_docker |
||||
|
||||
# run lightgbm to train model |
||||
train_basic_algorithm = MLFlowProjectsBasicAlgorithm( |
||||
name="train_basic_algorithm", |
||||
mlflow_tracking_uri=mlflow_tracking_uri, |
||||
parameters="n_estimators=200;learning_rate=0.2", |
||||
experiment_name="basic_algorithm_iris", |
||||
model_name="iris_B", |
||||
algorithm="lightgbm", |
||||
data_path="/data/examples/iris", |
||||
search_params="max_depth=[5, 10];n_estimators=[100, 200]", |
||||
) |
||||
|
||||
# Using DOCKER COMPOSE to deploy model from train_basic_algorithm |
||||
deploy_docker_compose = MLflowModels( |
||||
name="deploy_docker_compose", |
||||
model_uri="models:/iris_B/Production", |
||||
mlflow_tracking_uri=mlflow_tracking_uri, |
||||
deploy_mode=MLflowDeployType.DOCKER_COMPOSE, |
||||
port=7003, |
||||
) |
||||
|
||||
train_basic_algorithm >> deploy_docker_compose |
||||
|
||||
pd.submit() |
||||
|
||||
# [end workflow_declare] |
@ -0,0 +1,265 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one |
||||
# or more contributor license agreements. See the NOTICE file |
||||
# distributed with this work for additional information |
||||
# regarding copyright ownership. The ASF licenses this file |
||||
# to you under the Apache License, Version 2.0 (the |
||||
# "License"); you may not use this file except in compliance |
||||
# with the License. You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, |
||||
# software distributed under the License is distributed on an |
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||
# KIND, either express or implied. See the License for the |
||||
# specific language governing permissions and limitations |
||||
# under the License. |
||||
|
||||
"""Task mlflow.""" |
||||
from copy import deepcopy |
||||
from typing import Dict, Optional |
||||
|
||||
from pydolphinscheduler.constants import TaskType |
||||
from pydolphinscheduler.core.task import Task |
||||
|
||||
|
||||
class MLflowTaskType(str): |
||||
"""MLflow task type.""" |
||||
|
||||
MLFLOW_PROJECTS = "MLflow Projects" |
||||
MLFLOW_MODELS = "MLflow Models" |
||||
|
||||
|
||||
class MLflowJobType(str): |
||||
"""MLflow job type.""" |
||||
|
||||
AUTOML = "AutoML" |
||||
BASIC_ALGORITHM = "BasicAlgorithm" |
||||
CUSTOM_PROJECT = "CustomProject" |
||||
|
||||
|
||||
class MLflowDeployType(str): |
||||
"""MLflow deploy type.""" |
||||
|
||||
MLFLOW = "MLFLOW" |
||||
DOCKER = "DOCKER" |
||||
DOCKER_COMPOSE = "DOCKER COMPOSE" |
||||
|
||||
|
||||
DEFAULT_MLFLOW_TRACKING_URI = "http://127.0.0.1:5000" |
||||
DEFAULT_VERSION = "master" |
||||
|
||||
|
||||
class BaseMLflow(Task): |
||||
"""Base MLflow task.""" |
||||
|
||||
mlflow_task_type = None |
||||
|
||||
_task_custom_attr = { |
||||
"mlflow_tracking_uri", |
||||
"mlflow_task_type", |
||||
} |
||||
|
||||
_child_task_mlflow_attr = set() |
||||
|
||||
def __init__(self, name: str, mlflow_tracking_uri: str, *args, **kwargs): |
||||
super().__init__(name, TaskType.MLFLOW, *args, **kwargs) |
||||
self.mlflow_tracking_uri = mlflow_tracking_uri |
||||
|
||||
@property |
||||
def task_params(self) -> Dict: |
||||
"""Return task params.""" |
||||
self._task_custom_attr = deepcopy(self._task_custom_attr) |
||||
self._task_custom_attr.update(self._child_task_mlflow_attr) |
||||
return super().task_params |
||||
|
||||
|
||||
class MLflowModels(BaseMLflow): |
||||
"""Task MLflow models object, declare behavior for MLflow models task to dolphinscheduler. |
||||
|
||||
Deploy machine learning models in diverse serving environments. |
||||
|
||||
:param name: task name |
||||
:param model_uri: Model-URI of MLflow , support models:/<model_name>/suffix format and runs:/ format. |
||||
See https://mlflow.org/docs/latest/tracking.html#artifact-stores |
||||
:param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 |
||||
:param deploy_mode: MLflow deploy mode, support MLFLOW, DOCKER, DOCKER COMPOSE, default is DOCKER |
||||
:param port: deploy port, default is 7000 |
||||
:param cpu_limit: cpu limit, default is 1.0 |
||||
:param memory_limit: memory limit, default is 500M |
||||
""" |
||||
|
||||
mlflow_task_type = MLflowTaskType.MLFLOW_MODELS |
||||
|
||||
_child_task_mlflow_attr = { |
||||
"deploy_type", |
||||
"deploy_model_key", |
||||
"deploy_port", |
||||
"cpu_limit", |
||||
"memory_limit", |
||||
} |
||||
|
||||
def __init__( |
||||
self, |
||||
name: str, |
||||
model_uri: str, |
||||
mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, |
||||
deploy_mode: Optional[str] = MLflowDeployType.DOCKER, |
||||
port: Optional[int] = 7000, |
||||
cpu_limit: Optional[float] = 1.0, |
||||
memory_limit: Optional[str] = "500M", |
||||
*args, |
||||
**kwargs |
||||
): |
||||
"""Init mlflow models task.""" |
||||
super().__init__(name, mlflow_tracking_uri, *args, **kwargs) |
||||
self.deploy_type = deploy_mode.upper() |
||||
self.deploy_model_key = model_uri |
||||
self.deploy_port = port |
||||
self.cpu_limit = cpu_limit |
||||
self.memory_limit = memory_limit |
||||
|
||||
|
||||
class MLFlowProjectsCustom(BaseMLflow): |
||||
"""Task MLflow projects object, declare behavior for MLflow Custom projects task to dolphinscheduler. |
||||
|
||||
:param name: task name |
||||
:param repository: Repository url of MLflow Project, Support git address and directory on worker. |
||||
If it's in a subdirectory, We add # to support this (same as mlflow run) , |
||||
for example https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native. |
||||
:param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 |
||||
:param experiment_name: MLflow experiment name, default is empty |
||||
:param parameters: MLflow project parameters, default is empty |
||||
:param version: MLflow project version, default is master |
||||
|
||||
""" |
||||
|
||||
mlflow_task_type = MLflowTaskType.MLFLOW_PROJECTS |
||||
mlflow_job_type = MLflowJobType.CUSTOM_PROJECT |
||||
|
||||
_child_task_mlflow_attr = { |
||||
"mlflow_job_type", |
||||
"experiment_name", |
||||
"params", |
||||
"mlflow_project_repository", |
||||
"mlflow_project_version", |
||||
} |
||||
|
||||
def __init__( |
||||
self, |
||||
name: str, |
||||
repository: str, |
||||
mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, |
||||
experiment_name: Optional[str] = "", |
||||
parameters: Optional[str] = "", |
||||
version: Optional[str] = "master", |
||||
*args, |
||||
**kwargs |
||||
): |
||||
"""Init mlflow projects task.""" |
||||
super().__init__(name, mlflow_tracking_uri, *args, **kwargs) |
||||
self.mlflow_project_repository = repository |
||||
self.experiment_name = experiment_name |
||||
self.params = parameters |
||||
self.mlflow_project_version = version |
||||
|
||||
|
||||
class MLFlowProjectsAutoML(BaseMLflow): |
||||
"""Task MLflow projects object, declare behavior for AutoML task to dolphinscheduler. |
||||
|
||||
:param name: task name |
||||
:param data_path: data path of MLflow Project, Support git address and directory on worker. |
||||
:param automl_tool: The AutoML tool used, currently supports autosklearn and flaml. |
||||
:param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 |
||||
:param experiment_name: MLflow experiment name, default is empty |
||||
:param model_name: MLflow model name, default is empty |
||||
:param parameters: MLflow project parameters, default is empty |
||||
|
||||
""" |
||||
|
||||
mlflow_task_type = MLflowTaskType.MLFLOW_PROJECTS |
||||
mlflow_job_type = MLflowJobType.AUTOML |
||||
|
||||
_child_task_mlflow_attr = { |
||||
"mlflow_job_type", |
||||
"experiment_name", |
||||
"model_name", |
||||
"register_model", |
||||
"data_path", |
||||
"params", |
||||
"automl_tool", |
||||
} |
||||
|
||||
def __init__( |
||||
self, |
||||
name: str, |
||||
data_path: str, |
||||
automl_tool: Optional[str] = "flaml", |
||||
mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, |
||||
experiment_name: Optional[str] = "", |
||||
model_name: Optional[str] = "", |
||||
parameters: Optional[str] = "", |
||||
*args, |
||||
**kwargs |
||||
): |
||||
"""Init mlflow projects task.""" |
||||
super().__init__(name, mlflow_tracking_uri, *args, **kwargs) |
||||
self.data_path = data_path |
||||
self.experiment_name = experiment_name |
||||
self.model_name = model_name |
||||
self.params = parameters |
||||
self.automl_tool = automl_tool.lower() |
||||
self.register_model = bool(model_name) |
||||
|
||||
|
||||
class MLFlowProjectsBasicAlgorithm(BaseMLflow): |
||||
"""Task MLflow projects object, declare behavior for BasicAlgorithm task to dolphinscheduler. |
||||
|
||||
:param name: task name |
||||
:param data_path: data path of MLflow Project, Support git address and directory on worker. |
||||
:param algorithm: The selected algorithm currently supports LR, SVM, LightGBM and XGboost |
||||
based on scikit-learn form. |
||||
:param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 |
||||
:param experiment_name: MLflow experiment name, default is empty |
||||
:param model_name: MLflow model name, default is empty |
||||
:param parameters: MLflow project parameters, default is empty |
||||
:param search_params: Whether to search the parameters, default is empty |
||||
|
||||
""" |
||||
|
||||
mlflow_job_type = MLflowJobType.BASIC_ALGORITHM |
||||
mlflow_task_type = MLflowTaskType.MLFLOW_PROJECTS |
||||
|
||||
_child_task_mlflow_attr = { |
||||
"mlflow_job_type", |
||||
"experiment_name", |
||||
"model_name", |
||||
"register_model", |
||||
"data_path", |
||||
"params", |
||||
"algorithm", |
||||
"search_params", |
||||
} |
||||
|
||||
def __init__( |
||||
self, |
||||
name: str, |
||||
data_path: str, |
||||
algorithm: Optional[str] = "lightgbm", |
||||
mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, |
||||
experiment_name: Optional[str] = "", |
||||
model_name: Optional[str] = "", |
||||
parameters: Optional[str] = "", |
||||
search_params: Optional[str] = "", |
||||
*args, |
||||
**kwargs |
||||
): |
||||
"""Init mlflow projects task.""" |
||||
super().__init__(name, mlflow_tracking_uri, *args, **kwargs) |
||||
self.data_path = data_path |
||||
self.experiment_name = experiment_name |
||||
self.model_name = model_name |
||||
self.params = parameters |
||||
self.algorithm = algorithm.lower() |
||||
self.search_params = search_params |
||||
self.register_model = bool(model_name) |
@ -0,0 +1,211 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one |
||||
# or more contributor license agreements. See the NOTICE file |
||||
# distributed with this work for additional information |
||||
# regarding copyright ownership. The ASF licenses this file |
||||
# to you under the Apache License, Version 2.0 (the |
||||
# "License"); you may not use this file except in compliance |
||||
# with the License. You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, |
||||
# software distributed under the License is distributed on an |
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||
# KIND, either express or implied. See the License for the |
||||
# specific language governing permissions and limitations |
||||
# under the License. |
||||
|
||||
"""Test Task MLflow.""" |
||||
from copy import deepcopy |
||||
from unittest.mock import patch |
||||
|
||||
from pydolphinscheduler.tasks.mlflow import ( |
||||
MLflowDeployType, |
||||
MLflowJobType, |
||||
MLflowModels, |
||||
MLFlowProjectsAutoML, |
||||
MLFlowProjectsBasicAlgorithm, |
||||
MLFlowProjectsCustom, |
||||
MLflowTaskType, |
||||
) |
||||
|
||||
CODE = 123 |
||||
VERSION = 1 |
||||
MLFLOW_TRACKING_URI = "http://127.0.0.1:5000" |
||||
|
||||
EXPECT = { |
||||
"code": CODE, |
||||
"version": VERSION, |
||||
"description": None, |
||||
"delayTime": 0, |
||||
"taskType": "MLFLOW", |
||||
"taskParams": { |
||||
"resourceList": [], |
||||
"localParams": [], |
||||
"dependence": {}, |
||||
"conditionResult": {"successNode": [""], "failedNode": [""]}, |
||||
"waitStartTimeout": {}, |
||||
}, |
||||
"flag": "YES", |
||||
"taskPriority": "MEDIUM", |
||||
"workerGroup": "default", |
||||
"environmentCode": None, |
||||
"failRetryTimes": 0, |
||||
"failRetryInterval": 1, |
||||
"timeoutFlag": "CLOSE", |
||||
"timeoutNotifyStrategy": None, |
||||
"timeout": 0, |
||||
} |
||||
|
||||
|
||||
def test_mlflow_models_get_define(): |
||||
"""Test task mlflow models function get_define.""" |
||||
name = "mlflow_models" |
||||
model_uri = "models:/xgboost_native/Production" |
||||
port = 7001 |
||||
cpu_limit = 2.0 |
||||
memory_limit = "600M" |
||||
|
||||
expect = deepcopy(EXPECT) |
||||
expect["name"] = name |
||||
task_params = expect["taskParams"] |
||||
task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI |
||||
task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_MODELS |
||||
task_params["deployType"] = MLflowDeployType.DOCKER_COMPOSE |
||||
task_params["deployModelKey"] = model_uri |
||||
task_params["deployPort"] = port |
||||
task_params["cpuLimit"] = cpu_limit |
||||
task_params["memoryLimit"] = memory_limit |
||||
|
||||
with patch( |
||||
"pydolphinscheduler.core.task.Task.gen_code_and_version", |
||||
return_value=(CODE, VERSION), |
||||
): |
||||
task = MLflowModels( |
||||
name=name, |
||||
model_uri=model_uri, |
||||
mlflow_tracking_uri=MLFLOW_TRACKING_URI, |
||||
deploy_mode=MLflowDeployType.DOCKER_COMPOSE, |
||||
port=port, |
||||
cpu_limit=cpu_limit, |
||||
memory_limit=memory_limit, |
||||
) |
||||
assert task.get_define() == expect |
||||
|
||||
|
||||
def test_mlflow_project_custom_get_define(): |
||||
"""Test task mlflow project custom function get_define.""" |
||||
name = ("train_xgboost_native",) |
||||
repository = "https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native" |
||||
mlflow_tracking_uri = MLFLOW_TRACKING_URI |
||||
parameters = "-P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9" |
||||
experiment_name = "xgboost" |
||||
|
||||
expect = deepcopy(EXPECT) |
||||
expect["name"] = name |
||||
task_params = expect["taskParams"] |
||||
|
||||
task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI |
||||
task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_PROJECTS |
||||
task_params["mlflowJobType"] = MLflowJobType.CUSTOM_PROJECT |
||||
task_params["experimentName"] = experiment_name |
||||
task_params["params"] = parameters |
||||
task_params["mlflowProjectRepository"] = repository |
||||
task_params["mlflowProjectVersion"] = "dev" |
||||
|
||||
with patch( |
||||
"pydolphinscheduler.core.task.Task.gen_code_and_version", |
||||
return_value=(CODE, VERSION), |
||||
): |
||||
task = MLFlowProjectsCustom( |
||||
name=name, |
||||
repository=repository, |
||||
mlflow_tracking_uri=mlflow_tracking_uri, |
||||
parameters=parameters, |
||||
experiment_name=experiment_name, |
||||
version="dev", |
||||
) |
||||
assert task.get_define() == expect |
||||
|
||||
|
||||
def test_mlflow_project_automl_get_define(): |
||||
"""Test task mlflow project automl function get_define.""" |
||||
name = ("train_automl",) |
||||
mlflow_tracking_uri = MLFLOW_TRACKING_URI |
||||
parameters = "time_budget=30;estimator_list=['lgbm']" |
||||
experiment_name = "automl_iris" |
||||
model_name = "iris_A" |
||||
automl_tool = "flaml" |
||||
data_path = "/data/examples/iris" |
||||
|
||||
expect = deepcopy(EXPECT) |
||||
expect["name"] = name |
||||
task_params = expect["taskParams"] |
||||
|
||||
task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI |
||||
task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_PROJECTS |
||||
task_params["mlflowJobType"] = MLflowJobType.AUTOML |
||||
task_params["experimentName"] = experiment_name |
||||
task_params["modelName"] = model_name |
||||
task_params["registerModel"] = bool(model_name) |
||||
task_params["dataPath"] = data_path |
||||
task_params["params"] = parameters |
||||
task_params["automlTool"] = automl_tool |
||||
|
||||
with patch( |
||||
"pydolphinscheduler.core.task.Task.gen_code_and_version", |
||||
return_value=(CODE, VERSION), |
||||
): |
||||
task = MLFlowProjectsAutoML( |
||||
name=name, |
||||
mlflow_tracking_uri=mlflow_tracking_uri, |
||||
parameters=parameters, |
||||
experiment_name=experiment_name, |
||||
model_name=model_name, |
||||
automl_tool=automl_tool, |
||||
data_path=data_path, |
||||
) |
||||
assert task.get_define() == expect |
||||
|
||||
|
||||
def test_mlflow_project_basic_algorithm_get_define(): |
||||
"""Test task mlflow project BasicAlgorithm function get_define.""" |
||||
name = "train_basic_algorithm" |
||||
mlflow_tracking_uri = MLFLOW_TRACKING_URI |
||||
parameters = "n_estimators=200;learning_rate=0.2" |
||||
experiment_name = "basic_algorithm_iris" |
||||
model_name = "iris_B" |
||||
algorithm = "lightgbm" |
||||
data_path = "/data/examples/iris" |
||||
search_params = "max_depth=[5, 10];n_estimators=[100, 200]" |
||||
|
||||
expect = deepcopy(EXPECT) |
||||
expect["name"] = name |
||||
task_params = expect["taskParams"] |
||||
|
||||
task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI |
||||
task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_PROJECTS |
||||
task_params["mlflowJobType"] = MLflowJobType.BASIC_ALGORITHM |
||||
task_params["experimentName"] = experiment_name |
||||
task_params["modelName"] = model_name |
||||
task_params["registerModel"] = bool(model_name) |
||||
task_params["dataPath"] = data_path |
||||
task_params["params"] = parameters |
||||
task_params["algorithm"] = algorithm |
||||
task_params["searchParams"] = search_params |
||||
|
||||
with patch( |
||||
"pydolphinscheduler.core.task.Task.gen_code_and_version", |
||||
return_value=(CODE, VERSION), |
||||
): |
||||
task = MLFlowProjectsBasicAlgorithm( |
||||
name=name, |
||||
mlflow_tracking_uri=mlflow_tracking_uri, |
||||
parameters=parameters, |
||||
experiment_name=experiment_name, |
||||
model_name=model_name, |
||||
algorithm=algorithm, |
||||
data_path=data_path, |
||||
search_params=search_params, |
||||
) |
||||
assert task.get_define() == expect |
Loading…
Reference in new issue