Browse Source
* add spark task * fix code format * add parent class for flink and spark * modify Engine docstring * modify docstring of Engine3.0.0/version-upgrade
Devosend
3 years ago
committed by
GitHub
10 changed files with 464 additions and 40 deletions
@ -0,0 +1,31 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one |
||||
# or more contributor license agreements. See the NOTICE file |
||||
# distributed with this work for additional information |
||||
# regarding copyright ownership. The ASF licenses this file |
||||
# to you under the Apache License, Version 2.0 (the |
||||
# "License"); you may not use this file except in compliance |
||||
# with the License. You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, |
||||
# software distributed under the License is distributed on an |
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||
# KIND, either express or implied. See the License for the |
||||
# specific language governing permissions and limitations |
||||
# under the License. |
||||
|
||||
"""A example workflow for task spark.""" |
||||
|
||||
from pydolphinscheduler.core.process_definition import ProcessDefinition |
||||
from pydolphinscheduler.tasks.spark import DeployMode, ProgramType, Spark |
||||
|
||||
with ProcessDefinition(name="task_spark_example", tenant="tenant_exists") as pd: |
||||
task = Spark( |
||||
name="task_spark", |
||||
main_class="org.apache.spark.examples.SparkPi", |
||||
main_package="spark-examples_2.12-3.2.0.jar", |
||||
program_type=ProgramType.JAVA, |
||||
deploy_mode=DeployMode.LOCAL, |
||||
) |
||||
pd.run() |
@ -0,0 +1,95 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one |
||||
# or more contributor license agreements. See the NOTICE file |
||||
# distributed with this work for additional information |
||||
# regarding copyright ownership. The ASF licenses this file |
||||
# to you under the Apache License, Version 2.0 (the |
||||
# "License"); you may not use this file except in compliance |
||||
# with the License. You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, |
||||
# software distributed under the License is distributed on an |
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||
# KIND, either express or implied. See the License for the |
||||
# specific language governing permissions and limitations |
||||
# under the License. |
||||
|
||||
"""Module engine.""" |
||||
|
||||
from typing import Dict, Optional |
||||
|
||||
from py4j.protocol import Py4JJavaError |
||||
|
||||
from pydolphinscheduler.core.task import Task |
||||
from pydolphinscheduler.exceptions import PyDSParamException |
||||
from pydolphinscheduler.java_gateway import launch_gateway |
||||
|
||||
|
||||
class ProgramType(str): |
||||
"""Type of program engine runs, for now it just contain `JAVA`, `SCALA` and `PYTHON`.""" |
||||
|
||||
JAVA = "JAVA" |
||||
SCALA = "SCALA" |
||||
PYTHON = "PYTHON" |
||||
|
||||
|
||||
class Engine(Task): |
||||
"""Task engine object, declare behavior for engine task to dolphinscheduler. |
||||
|
||||
This is the parent class of spark, flink and mr tasks, |
||||
and is used to provide the programType, mainClass and mainJar task parameters for reuse. |
||||
""" |
||||
|
||||
def __init__( |
||||
self, |
||||
name: str, |
||||
task_type: str, |
||||
main_class: str, |
||||
main_package: str, |
||||
program_type: Optional[ProgramType] = ProgramType.SCALA, |
||||
*args, |
||||
**kwargs |
||||
): |
||||
super().__init__(name, task_type, *args, **kwargs) |
||||
self.main_class = main_class |
||||
self.main_package = main_package |
||||
self.program_type = program_type |
||||
self._resource = {} |
||||
|
||||
def get_resource_info(self, program_type, main_package): |
||||
"""Get resource info from java gateway, contains resource id, name.""" |
||||
if self._resource: |
||||
return self._resource |
||||
else: |
||||
gateway = launch_gateway() |
||||
try: |
||||
self._resource = gateway.entry_point.getResourcesFileInfo( |
||||
program_type, main_package |
||||
) |
||||
# Handler source do not exists error, for now we just terminate the process. |
||||
except Py4JJavaError as ex: |
||||
raise PyDSParamException(str(ex.java_exception)) |
||||
return self._resource |
||||
|
||||
def get_jar_id(self) -> int: |
||||
"""Get jar id from java gateway, a wrapper for :func:`get_resource_info`.""" |
||||
return self.get_resource_info(self.program_type, self.main_package).get("id") |
||||
|
||||
@property |
||||
def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: |
||||
"""Override Task.task_params for engine children task. |
||||
|
||||
children task have some specials attribute for task_params, and is odd if we |
||||
directly set as python property, so we Override Task.task_params here. |
||||
""" |
||||
params = super().task_params |
||||
custom_params = { |
||||
"programType": self.program_type, |
||||
"mainClass": self.main_class, |
||||
"mainJar": { |
||||
"id": self.get_jar_id(), |
||||
}, |
||||
} |
||||
params.update(custom_params) |
||||
return params |
@ -0,0 +1,94 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one |
||||
# or more contributor license agreements. See the NOTICE file |
||||
# distributed with this work for additional information |
||||
# regarding copyright ownership. The ASF licenses this file |
||||
# to you under the Apache License, Version 2.0 (the |
||||
# "License"); you may not use this file except in compliance |
||||
# with the License. You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, |
||||
# software distributed under the License is distributed on an |
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||
# KIND, either express or implied. See the License for the |
||||
# specific language governing permissions and limitations |
||||
# under the License. |
||||
|
||||
"""Task Spark.""" |
||||
|
||||
from typing import Optional |
||||
|
||||
from pydolphinscheduler.constants import TaskType |
||||
from pydolphinscheduler.core.engine import Engine, ProgramType |
||||
|
||||
|
||||
class SparkVersion(str): |
||||
"""Spark version, for now it just contain `SPARK1` and `SPARK2`.""" |
||||
|
||||
SPARK1 = "SPARK1" |
||||
SPARK2 = "SPARK2" |
||||
|
||||
|
||||
class DeployMode(str): |
||||
"""SPARK deploy mode, for now it just contain `LOCAL`, `CLIENT` and `CLUSTER`.""" |
||||
|
||||
LOCAL = "local" |
||||
CLIENT = "client" |
||||
CLUSTER = "cluster" |
||||
|
||||
|
||||
class Spark(Engine): |
||||
"""Task spark object, declare behavior for spark task to dolphinscheduler.""" |
||||
|
||||
_task_custom_attr = { |
||||
"deploy_mode", |
||||
"spark_version", |
||||
"driver_cores", |
||||
"driver_memory", |
||||
"num_executors", |
||||
"executor_memory", |
||||
"executor_cores", |
||||
"app_name", |
||||
"main_args", |
||||
"others", |
||||
} |
||||
|
||||
def __init__( |
||||
self, |
||||
name: str, |
||||
main_class: str, |
||||
main_package: str, |
||||
program_type: Optional[ProgramType] = ProgramType.SCALA, |
||||
deploy_mode: Optional[DeployMode] = DeployMode.CLUSTER, |
||||
spark_version: Optional[SparkVersion] = SparkVersion.SPARK2, |
||||
app_name: Optional[str] = None, |
||||
driver_cores: Optional[int] = 1, |
||||
driver_memory: Optional[str] = "512M", |
||||
num_executors: Optional[int] = 2, |
||||
executor_memory: Optional[str] = "2G", |
||||
executor_cores: Optional[int] = 2, |
||||
main_args: Optional[str] = None, |
||||
others: Optional[str] = None, |
||||
*args, |
||||
**kwargs |
||||
): |
||||
super().__init__( |
||||
name, |
||||
TaskType.SPARK, |
||||
main_class, |
||||
main_package, |
||||
program_type, |
||||
*args, |
||||
**kwargs |
||||
) |
||||
self.deploy_mode = deploy_mode |
||||
self.spark_version = spark_version |
||||
self.app_name = app_name |
||||
self.driver_cores = driver_cores |
||||
self.driver_memory = driver_memory |
||||
self.num_executors = num_executors |
||||
self.executor_memory = executor_memory |
||||
self.executor_cores = executor_cores |
||||
self.main_args = main_args |
||||
self.others = others |
@ -0,0 +1,147 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one |
||||
# or more contributor license agreements. See the NOTICE file |
||||
# distributed with this work for additional information |
||||
# regarding copyright ownership. The ASF licenses this file |
||||
# to you under the Apache License, Version 2.0 (the |
||||
# "License"); you may not use this file except in compliance |
||||
# with the License. You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, |
||||
# software distributed under the License is distributed on an |
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||
# KIND, either express or implied. See the License for the |
||||
# specific language governing permissions and limitations |
||||
# under the License. |
||||
|
||||
"""Test Task Engine.""" |
||||
|
||||
|
||||
from unittest.mock import patch |
||||
|
||||
import pytest |
||||
|
||||
from pydolphinscheduler.core.engine import Engine, ProgramType |
||||
|
||||
TEST_ENGINE_TASK_TYPE = "ENGINE" |
||||
TEST_MAIN_CLASS = "org.apache.examples.mock.Mock" |
||||
TEST_MAIN_PACKAGE = "Mock.jar" |
||||
TEST_PROGRAM_TYPE = ProgramType.JAVA |
||||
|
||||
|
||||
@patch( |
||||
"pydolphinscheduler.core.task.Task.gen_code_and_version", |
||||
return_value=(123, 1), |
||||
) |
||||
@patch( |
||||
"pydolphinscheduler.core.engine.Engine.get_resource_info", |
||||
return_value=({"id": 1, "name": "mock_name"}), |
||||
) |
||||
def test_get_jar_detail(mock_resource, mock_code_version): |
||||
"""Test :func:`get_jar_id` can return expect value.""" |
||||
name = "test_get_jar_detail" |
||||
task = Engine( |
||||
name, |
||||
TEST_ENGINE_TASK_TYPE, |
||||
TEST_MAIN_CLASS, |
||||
TEST_MAIN_PACKAGE, |
||||
TEST_PROGRAM_TYPE, |
||||
) |
||||
assert 1 == task.get_jar_id() |
||||
|
||||
|
||||
@pytest.mark.parametrize( |
||||
"attr, expect", |
||||
[ |
||||
( |
||||
{ |
||||
"name": "test-task-params", |
||||
"task_type": "test-engine", |
||||
"main_class": "org.apache.examples.mock.Mock", |
||||
"main_package": "TestMock.jar", |
||||
"program_type": ProgramType.JAVA, |
||||
}, |
||||
{ |
||||
"mainClass": "org.apache.examples.mock.Mock", |
||||
"mainJar": { |
||||
"id": 1, |
||||
}, |
||||
"programType": ProgramType.JAVA, |
||||
"localParams": [], |
||||
"resourceList": [], |
||||
"dependence": {}, |
||||
"conditionResult": {"successNode": [""], "failedNode": [""]}, |
||||
"waitStartTimeout": {}, |
||||
}, |
||||
) |
||||
], |
||||
) |
||||
@patch( |
||||
"pydolphinscheduler.core.task.Task.gen_code_and_version", |
||||
return_value=(123, 1), |
||||
) |
||||
@patch( |
||||
"pydolphinscheduler.core.engine.Engine.get_resource_info", |
||||
return_value=({"id": 1, "name": "mock_name"}), |
||||
) |
||||
def test_property_task_params(mock_resource, mock_code_version, attr, expect): |
||||
"""Test task engine task property.""" |
||||
task = Engine(**attr) |
||||
assert expect == task.task_params |
||||
|
||||
|
||||
@pytest.mark.parametrize( |
||||
"attr, expect", |
||||
[ |
||||
( |
||||
{ |
||||
"name": "test-task-test_engine_get_define", |
||||
"task_type": "test-engine", |
||||
"main_class": "org.apache.examples.mock.Mock", |
||||
"main_package": "TestMock.jar", |
||||
"program_type": ProgramType.JAVA, |
||||
}, |
||||
{ |
||||
"code": 123, |
||||
"name": "test-task-test_engine_get_define", |
||||
"version": 1, |
||||
"description": None, |
||||
"delayTime": 0, |
||||
"taskType": "test-engine", |
||||
"taskParams": { |
||||
"mainClass": "org.apache.examples.mock.Mock", |
||||
"mainJar": { |
||||
"id": 1, |
||||
}, |
||||
"programType": ProgramType.JAVA, |
||||
"localParams": [], |
||||
"resourceList": [], |
||||
"dependence": {}, |
||||
"conditionResult": {"successNode": [""], "failedNode": [""]}, |
||||
"waitStartTimeout": {}, |
||||
}, |
||||
"flag": "YES", |
||||
"taskPriority": "MEDIUM", |
||||
"workerGroup": "default", |
||||
"failRetryTimes": 0, |
||||
"failRetryInterval": 1, |
||||
"timeoutFlag": "CLOSE", |
||||
"timeoutNotifyStrategy": None, |
||||
"timeout": 0, |
||||
}, |
||||
) |
||||
], |
||||
) |
||||
@patch( |
||||
"pydolphinscheduler.core.task.Task.gen_code_and_version", |
||||
return_value=(123, 1), |
||||
) |
||||
@patch( |
||||
"pydolphinscheduler.core.engine.Engine.get_resource_info", |
||||
return_value=({"id": 1, "name": "mock_name"}), |
||||
) |
||||
def test_engine_get_define(mock_resource, mock_code_version, attr, expect): |
||||
"""Test task engine function get_define.""" |
||||
task = Engine(**attr) |
||||
assert task.get_define() == expect |
@ -0,0 +1,82 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one |
||||
# or more contributor license agreements. See the NOTICE file |
||||
# distributed with this work for additional information |
||||
# regarding copyright ownership. The ASF licenses this file |
||||
# to you under the Apache License, Version 2.0 (the |
||||
# "License"); you may not use this file except in compliance |
||||
# with the License. You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, |
||||
# software distributed under the License is distributed on an |
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||
# KIND, either express or implied. See the License for the |
||||
# specific language governing permissions and limitations |
||||
# under the License. |
||||
|
||||
"""Test Task Spark.""" |
||||
|
||||
from unittest.mock import patch |
||||
|
||||
from pydolphinscheduler.tasks.spark import DeployMode, ProgramType, Spark, SparkVersion |
||||
|
||||
|
||||
@patch( |
||||
"pydolphinscheduler.core.engine.Engine.get_resource_info", |
||||
return_value=({"id": 1, "name": "test"}), |
||||
) |
||||
def test_spark_get_define(mock_resource): |
||||
"""Test task spark function get_define.""" |
||||
code = 123 |
||||
version = 1 |
||||
name = "test_spark_get_define" |
||||
main_class = "org.apache.spark.test_main_class" |
||||
main_package = "test_main_package" |
||||
program_type = ProgramType.JAVA |
||||
deploy_mode = DeployMode.LOCAL |
||||
|
||||
expect = { |
||||
"code": code, |
||||
"name": name, |
||||
"version": 1, |
||||
"description": None, |
||||
"delayTime": 0, |
||||
"taskType": "SPARK", |
||||
"taskParams": { |
||||
"mainClass": main_class, |
||||
"mainJar": { |
||||
"id": 1, |
||||
}, |
||||
"programType": program_type, |
||||
"deployMode": deploy_mode, |
||||
"sparkVersion": SparkVersion.SPARK2, |
||||
"driverCores": 1, |
||||
"driverMemory": "512M", |
||||
"numExecutors": 2, |
||||
"executorMemory": "2G", |
||||
"executorCores": 2, |
||||
"appName": None, |
||||
"mainArgs": None, |
||||
"others": None, |
||||
"localParams": [], |
||||
"resourceList": [], |
||||
"dependence": {}, |
||||
"conditionResult": {"successNode": [""], "failedNode": [""]}, |
||||
"waitStartTimeout": {}, |
||||
}, |
||||
"flag": "YES", |
||||
"taskPriority": "MEDIUM", |
||||
"workerGroup": "default", |
||||
"failRetryTimes": 0, |
||||
"failRetryInterval": 1, |
||||
"timeoutFlag": "CLOSE", |
||||
"timeoutNotifyStrategy": None, |
||||
"timeout": 0, |
||||
} |
||||
with patch( |
||||
"pydolphinscheduler.core.task.Task.gen_code_and_version", |
||||
return_value=(code, version), |
||||
): |
||||
task = Spark(name, main_class, main_package, program_type, deploy_mode) |
||||
assert task.get_define() == expect |
Loading…
Reference in new issue