Jiajie Zhong
3 years ago
committed by
GitHub
45 changed files with 1394 additions and 30 deletions
@ -0,0 +1,39 @@ |
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
# Minimal makefile for Sphinx documentation
|
||||||
|
#
|
||||||
|
|
||||||
|
# You can set these variables from the command line, and also
|
||||||
|
# from the environment for the first two.
|
||||||
|
|
||||||
|
# Add opts `turn warnings into errors` strict sphinx-build behavior
|
||||||
|
SPHINXOPTS ?= -W
|
||||||
|
SPHINXBUILD ?= sphinx-build
|
||||||
|
SOURCEDIR = source
|
||||||
|
BUILDDIR = build
|
||||||
|
|
||||||
|
# Put it first so that "make" without argument is like "make help".
|
||||||
|
help: |
||||||
|
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||||
|
|
||||||
|
.PHONY: help Makefile |
||||||
|
|
||||||
|
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||||
|
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||||
|
%: Makefile |
||||||
|
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
@ -0,0 +1,54 @@ |
|||||||
|
REM Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
REM or more contributor license agreements. See the NOTICE file |
||||||
|
REM distributed with this work for additional information |
||||||
|
REM regarding copyright ownership. The ASF licenses this file |
||||||
|
REM to you under the Apache License, Version 2.0 (the |
||||||
|
REM "License"); you may not use this file except in compliance |
||||||
|
REM with the License. You may obtain a copy of the License at |
||||||
|
REM |
||||||
|
REM http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
REM |
||||||
|
REM Unless required by applicable law or agreed to in writing, |
||||||
|
REM software distributed under the License is distributed on an |
||||||
|
REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
REM KIND, either express or implied. See the License for the |
||||||
|
REM specific language governing permissions and limitations |
||||||
|
REM under the License. |
||||||
|
|
||||||
|
@ECHO OFF |
||||||
|
|
||||||
|
pushd %~dp0 |
||||||
|
|
||||||
|
REM Command file for Sphinx documentation |
||||||
|
|
||||||
|
if "%SPHINXBUILD%" == "" ( |
||||||
|
set SPHINXBUILD=sphinx-build |
||||||
|
) |
||||||
|
set SOURCEDIR=source |
||||||
|
set BUILDDIR=build |
||||||
|
REM Add opts `turn warnings into errors` strict sphinx-build behavior |
||||||
|
set SPHINXOPTS=-W |
||||||
|
|
||||||
|
if "%1" == "" goto help |
||||||
|
|
||||||
|
%SPHINXBUILD% >NUL 2>NUL |
||||||
|
if errorlevel 9009 ( |
||||||
|
echo. |
||||||
|
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx |
||||||
|
echo.installed, then set the SPHINXBUILD environment variable to point |
||||||
|
echo.to the full path of the 'sphinx-build' executable. Alternatively you |
||||||
|
echo.may add the Sphinx directory to PATH. |
||||||
|
echo. |
||||||
|
echo.If you don't have Sphinx installed, grab it from |
||||||
|
echo.https://www.sphinx-doc.org/ |
||||||
|
exit /b 1 |
||||||
|
) |
||||||
|
|
||||||
|
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% |
||||||
|
goto end |
||||||
|
|
||||||
|
:help |
||||||
|
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% |
||||||
|
|
||||||
|
:end |
||||||
|
popd |
@ -0,0 +1,47 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
API |
||||||
|
=== |
||||||
|
|
||||||
|
Core |
||||||
|
---- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.core |
||||||
|
:inherited-members: |
||||||
|
|
||||||
|
Sides |
||||||
|
----- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.side |
||||||
|
:inherited-members: |
||||||
|
|
||||||
|
Tasks |
||||||
|
----- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks |
||||||
|
:inherited-members: |
||||||
|
|
||||||
|
Constants |
||||||
|
--------- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.constants |
||||||
|
|
||||||
|
Exceptions |
||||||
|
---------- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.exceptions |
@ -0,0 +1,151 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Concepts |
||||||
|
======== |
||||||
|
|
||||||
|
In this section, you would know the core concepts of *PyDolphinScheduler*. |
||||||
|
|
||||||
|
Process Definition |
||||||
|
------------------ |
||||||
|
|
||||||
|
Process definition describe the whole things except `tasks`_ and `tasks dependence`_, which including |
||||||
|
name, schedule interval, schedule start time and end time. You would know scheduler |
||||||
|
|
||||||
|
Process definition could be initialized in normal assign statement or in context manger. |
||||||
|
|
||||||
|
.. code-block:: python |
||||||
|
|
||||||
|
# Initialization with assign statement |
||||||
|
pd = ProcessDefinition(name="my first process definition") |
||||||
|
|
||||||
|
# Or context manger |
||||||
|
with ProcessDefinition(name="my first process definition") as pd: |
||||||
|
pd.submit() |
||||||
|
|
||||||
|
Process definition is the main object communicate between *PyDolphinScheduler* and DolphinScheduler daemon. |
||||||
|
After process definition and task is be declared, you could use `submit` and `run` notify server your definition. |
||||||
|
|
||||||
|
If you just want to submit your definition and create workflow, without run it, you should use attribute `submit`. |
||||||
|
But if you want to run the workflow after you submit it, you could use attribute `run`. |
||||||
|
|
||||||
|
.. code-block:: python |
||||||
|
|
||||||
|
# Just submit definition, without run it |
||||||
|
pd.submit() |
||||||
|
|
||||||
|
# Both submit and run definition |
||||||
|
pd.run() |
||||||
|
|
||||||
|
Schedule |
||||||
|
~~~~~~~~ |
||||||
|
|
||||||
|
We use parameter `schedule` determine the schedule interval of workflow, *PyDolphinScheduler* support seven |
||||||
|
asterisks expression, and each of the meaning of position as below |
||||||
|
|
||||||
|
.. code-block:: text |
||||||
|
|
||||||
|
* * * * * * * |
||||||
|
┬ ┬ ┬ ┬ ┬ ┬ ┬ |
||||||
|
│ │ │ │ │ │ │ |
||||||
|
│ │ │ │ │ │ └─── year |
||||||
|
│ │ │ │ │ └───── day of week (0 - 7) (0 to 6 are Sunday to Saturday, or use names; 7 is Sunday, the same as 0) |
||||||
|
│ │ │ │ └─────── month (1 - 12) |
||||||
|
│ │ │ └───────── day of month (1 - 31) |
||||||
|
│ │ └─────────── hour (0 - 23) |
||||||
|
│ └───────────── min (0 - 59) |
||||||
|
└─────────────── second (0 - 59) |
||||||
|
|
||||||
|
Here we add some example crontab: |
||||||
|
|
||||||
|
- `0 0 0 * * ? *`: Workflow execute every day at 00:00:00. |
||||||
|
- `10 2 * * * ? *`: Workflow execute hourly day at ten pass two. |
||||||
|
- `10,11 20 0 1,2 * ? *`: Workflow execute first and second day of month at 00:20:10 and 00:20:11. |
||||||
|
|
||||||
|
Tenant |
||||||
|
~~~~~~ |
||||||
|
|
||||||
|
Tenant is the user who run task command in machine or in virtual machine. it could be assign by simple string. |
||||||
|
|
||||||
|
.. code-block:: python |
||||||
|
|
||||||
|
# |
||||||
|
pd = ProcessDefinition(name="process definition tenant", tenant="tenant_exists") |
||||||
|
|
||||||
|
.. note:: |
||||||
|
|
||||||
|
Make should tenant exists in target machine, otherwise it will raise an error when you try to run command |
||||||
|
|
||||||
|
Tasks |
||||||
|
----- |
||||||
|
|
||||||
|
Task is the minimum unit running actual job, and it is nodes of DAG, aka directed acyclic graph. You could define |
||||||
|
what you want to in the task. It have some required parameter to make uniqueness and definition. |
||||||
|
|
||||||
|
Here we use :py:meth:`pydolphinscheduler.tasks.Shell` as example, parameter `name` and `command` is required and must be provider. Parameter |
||||||
|
`name` set name to the task, and parameter `command` declare the command you wish to run in this task. |
||||||
|
|
||||||
|
.. code-block:: python |
||||||
|
|
||||||
|
# We named this task as "shell", and just run command `echo shell task` |
||||||
|
shell_task = Shell(name="shell", command="echo shell task") |
||||||
|
|
||||||
|
If you want to see all type of tasks, you could see :doc:`tasks/index`. |
||||||
|
|
||||||
|
Tasks Dependence |
||||||
|
~~~~~~~~~~~~~~~~ |
||||||
|
|
||||||
|
You could define many tasks in on single `Process Definition`_. If all those task is in parallel processing, |
||||||
|
then you could leave them alone without adding any additional information. But if there have some tasks should |
||||||
|
not be run unless pre task in workflow have be done, we should set task dependence to them. Set tasks dependence |
||||||
|
have two mainly way and both of them is easy. You could use bitwise operator `>>` and `<<`, or task attribute |
||||||
|
`set_downstream` and `set_upstream` to do it. |
||||||
|
|
||||||
|
.. code-block:: python |
||||||
|
|
||||||
|
# Set task1 as task2 upstream |
||||||
|
task1 >> task2 |
||||||
|
# You could use attribute `set_downstream` too, is same as `task1 >> task2` |
||||||
|
task1.set_downstream(task2) |
||||||
|
|
||||||
|
# Set task1 as task2 downstream |
||||||
|
task1 << task2 |
||||||
|
# It is same as attribute `set_upstream` |
||||||
|
task1.set_upstream(task2) |
||||||
|
|
||||||
|
# Beside, we could set dependence between task and sequence of tasks, |
||||||
|
# we set `task1` is upstream to both `task2` and `task3`. It is useful |
||||||
|
# for some tasks have same dependence. |
||||||
|
task1 >> [task2, task3] |
||||||
|
|
||||||
|
Task With Process Definition |
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
||||||
|
|
||||||
|
In most of data orchestration cases, you should assigned attribute `process_definition` to task instance to |
||||||
|
decide workflow of task. You could set `process_definition` in both normal assign or in context manger mode |
||||||
|
|
||||||
|
.. code-block:: python |
||||||
|
|
||||||
|
# Normal assign, have to explicit declaration and pass `ProcessDefinition` instance to task |
||||||
|
pd = ProcessDefinition(name="my first process definition") |
||||||
|
shell_task = Shell(name="shell", command="echo shell task", process_definition=pd) |
||||||
|
|
||||||
|
# Context manger, `ProcessDefinition` instance pd would implicit declaration to task |
||||||
|
with ProcessDefinition(name="my first process definition") as pd: |
||||||
|
shell_task = Shell(name="shell", command="echo shell task", |
||||||
|
|
||||||
|
With both `Process Definition`_, `Tasks`_ and `Tasks Dependence`_, we could build a workflow with multiple tasks. |
@ -0,0 +1,88 @@ |
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
# or more contributor license agreements. See the NOTICE file |
||||||
|
# distributed with this work for additional information |
||||||
|
# regarding copyright ownership. The ASF licenses this file |
||||||
|
# to you under the Apache License, Version 2.0 (the |
||||||
|
# "License"); you may not use this file except in compliance |
||||||
|
# with the License. You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, |
||||||
|
# software distributed under the License is distributed on an |
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
# KIND, either express or implied. See the License for the |
||||||
|
# specific language governing permissions and limitations |
||||||
|
# under the License. |
||||||
|
|
||||||
|
# Configuration file for the Sphinx documentation builder. |
||||||
|
# |
||||||
|
# This file only contains a selection of the most common options. For a full |
||||||
|
# list see the documentation: |
||||||
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html |
||||||
|
|
||||||
|
# -- Path setup -------------------------------------------------------------- |
||||||
|
|
||||||
|
# If extensions (or modules to document with autodoc) are in another directory, |
||||||
|
# add these directories to sys.path here. If the directory is relative to the |
||||||
|
# documentation root, use os.path.abspath to make it absolute, like shown here. |
||||||
|
# |
||||||
|
# import os |
||||||
|
# import sys |
||||||
|
# sys.path.insert(0, os.path.abspath('.')) |
||||||
|
|
||||||
|
|
||||||
|
# -- Project information ----------------------------------------------------- |
||||||
|
|
||||||
|
project = "pydolphinscheduler" |
||||||
|
copyright = "2022, apache" |
||||||
|
author = "apache" |
||||||
|
|
||||||
|
# The full version, including alpha/beta/rc tags |
||||||
|
release = "0.0.1" |
||||||
|
|
||||||
|
|
||||||
|
# -- General configuration --------------------------------------------------- |
||||||
|
|
||||||
|
# Add any Sphinx extension module names here, as strings. They can be |
||||||
|
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom |
||||||
|
# ones. |
||||||
|
extensions = [ |
||||||
|
# Measures durations of Sphinx processing |
||||||
|
"sphinx.ext.duration", |
||||||
|
# Semi-automatic make docstrings to document |
||||||
|
"sphinx.ext.autodoc", |
||||||
|
"sphinx.ext.viewcode", |
||||||
|
"sphinx.ext.autosectionlabel", |
||||||
|
"sphinx_rtd_theme", |
||||||
|
] |
||||||
|
|
||||||
|
# Add any paths that contain templates here, relative to this directory. |
||||||
|
templates_path = ["_templates"] |
||||||
|
|
||||||
|
# List of patterns, relative to source directory, that match files and |
||||||
|
# directories to ignore when looking for source files. |
||||||
|
# This pattern also affects html_static_path and html_extra_path. |
||||||
|
exclude_patterns = [] |
||||||
|
|
||||||
|
autodoc_default_options = { |
||||||
|
"members": True, |
||||||
|
"show-inheritance": True, |
||||||
|
"private-members": True, |
||||||
|
"undoc-members": True, |
||||||
|
"member-order": "groupwise", |
||||||
|
} |
||||||
|
|
||||||
|
autosectionlabel_prefix_document = True |
||||||
|
|
||||||
|
# -- Options for HTML output ------------------------------------------------- |
||||||
|
|
||||||
|
# The theme to use for HTML and HTML Help pages. See the documentation for |
||||||
|
# a list of builtin themes. |
||||||
|
# |
||||||
|
html_theme = "sphinx_rtd_theme" |
||||||
|
|
||||||
|
# Add any paths that contain custom static files (such as style sheets) here, |
||||||
|
# relative to this directory. They are copied after the builtin static files, |
||||||
|
# so a file named "default.css" will overwrite the builtin "default.css". |
||||||
|
html_static_path = ["_static"] |
@ -0,0 +1,42 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
PyDolphinScheduler |
||||||
|
================== |
||||||
|
|
||||||
|
**PyDolphinScheduler** is Python API for `Apache DolphinScheduler <https://dolphinscheduler.apache.org>`_, |
||||||
|
which allow you definition your workflow by Python code, aka workflow-as-codes. |
||||||
|
|
||||||
|
I could go and find how to :ref:`install <start:getting started>` the project. Or if you want to see simply example |
||||||
|
then go and see :doc:`tutorial` for more detail. |
||||||
|
|
||||||
|
|
||||||
|
.. toctree:: |
||||||
|
:maxdepth: 2 |
||||||
|
|
||||||
|
start |
||||||
|
tutorial |
||||||
|
concept |
||||||
|
tasks/index |
||||||
|
api |
||||||
|
|
||||||
|
Indices and tables |
||||||
|
================== |
||||||
|
|
||||||
|
* :ref:`genindex` |
||||||
|
* :ref:`modindex` |
||||||
|
* :ref:`search` |
@ -0,0 +1,113 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Getting Started |
||||||
|
=============== |
||||||
|
|
||||||
|
To get started with *PyDolphinScheduler* you must ensure python and pip |
||||||
|
installed on your machine, if you're already set up, you can skip straight |
||||||
|
to `Installing PyDolphinScheduler`_, otherwise please continue with |
||||||
|
`Installing Python`_. |
||||||
|
|
||||||
|
Installing Python |
||||||
|
----------------- |
||||||
|
|
||||||
|
How to install `python` and `pip` depends on what operating system |
||||||
|
you're using. The python wiki provides up to date |
||||||
|
`instructions for all platforms here`_. When you entering the website |
||||||
|
and choice your operating system, you would be offered the choice and |
||||||
|
select python version. *PyDolphinScheduler* recommend use version above |
||||||
|
Python 3.6 and we highly recommend you install *Stable Releases* instead |
||||||
|
of *Pre-releases*. |
||||||
|
|
||||||
|
After you have download and installed Python, you should open your terminal, |
||||||
|
typing and running :code:`python --version` to check whether the installation |
||||||
|
is correct or not. If all thing good, you could see the version in console |
||||||
|
without error(here is a example after Python 3.8.7 installed) |
||||||
|
|
||||||
|
.. code-block:: bash |
||||||
|
|
||||||
|
$ python --version |
||||||
|
Python 3.8.7 |
||||||
|
|
||||||
|
Installing PyDolphinScheduler |
||||||
|
----------------------------- |
||||||
|
|
||||||
|
After Python is already installed on your machine following section |
||||||
|
`installing Python`_, it easy to *PyDolphinScheduler* by pip. |
||||||
|
|
||||||
|
.. code-block:: bash |
||||||
|
|
||||||
|
$ pip install apache-dolphinscheduler |
||||||
|
|
||||||
|
The latest version of *PyDolphinScheduler* would be installed after you run above |
||||||
|
command in your terminal. You could go and `start Python Gateway Server`_ to finish |
||||||
|
the prepare, and then go to :doc:`tutorial` to make your hand dirty. But if you |
||||||
|
want to install the unreleased version of *PyDolphinScheduler*, you could go and see |
||||||
|
section `installing PyDolphinScheduler in dev`_ for more detail. |
||||||
|
|
||||||
|
Installing PyDolphinScheduler In Dev |
||||||
|
------------------------------------ |
||||||
|
|
||||||
|
Because the project is developing and some of the features still not release. |
||||||
|
If you want to try some thing unreleased you could install from the source code |
||||||
|
which we hold in GitHub |
||||||
|
|
||||||
|
.. code-block:: bash |
||||||
|
|
||||||
|
# Clone Apache DolphinScheduler repository |
||||||
|
$ git clone git@github.com:apache/dolphinscheduler.git |
||||||
|
# Install PyDolphinScheduler in develop mode |
||||||
|
$ cd dolphinscheduler-python/pydolphinscheduler && pip install -e . |
||||||
|
|
||||||
|
After you installed *PyDolphinScheduler*, please remember `start Python Gateway Server`_ |
||||||
|
which waiting for *PyDolphinScheduler*'s workflow definition require. |
||||||
|
|
||||||
|
Start Python Gateway Server |
||||||
|
--------------------------- |
||||||
|
|
||||||
|
Since **PyDolphinScheduler** is Python API for `Apache DolphinScheduler`_, it |
||||||
|
could define workflow and tasks structure, but could not run it unless you |
||||||
|
`install Apache DolphinScheduler`_ and start Python gateway server. We only |
||||||
|
and some key steps here and you could go `install Apache DolphinScheduler`_ |
||||||
|
for more detail |
||||||
|
|
||||||
|
.. code-block:: bash |
||||||
|
|
||||||
|
# Start pythonGatewayServer |
||||||
|
$ ./bin/dolphinscheduler-daemon.sh start pythonGatewayServer |
||||||
|
|
||||||
|
To check whether the server is alive or not, you could run :code:`jps`. And |
||||||
|
the server is health if keyword `PythonGatewayServer` in the console. |
||||||
|
|
||||||
|
.. code-block:: bash |
||||||
|
|
||||||
|
$ jps |
||||||
|
.... |
||||||
|
201472 PythonGatewayServer |
||||||
|
.... |
||||||
|
|
||||||
|
What's More |
||||||
|
----------- |
||||||
|
|
||||||
|
If you do not familiar with *PyDolphinScheduler*, you could go to :doc:`tutorial` |
||||||
|
and see how it work. But if you already know the inside of *PyDolphinScheduler*, |
||||||
|
maybe you could go and play with all :doc:`tasks/index` *PyDolphinScheduler* supports. |
||||||
|
|
||||||
|
.. _`instructions for all platforms here`: https://wiki.python.org/moin/BeginnersGuide/Download |
||||||
|
.. _`Apache DolphinScheduler`: https://dolphinscheduler.apache.org |
||||||
|
.. _`install Apache DolphinScheduler`: https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/installation/standalone.html |
@ -0,0 +1,33 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Condition |
||||||
|
========= |
||||||
|
|
||||||
|
A condition task type's example and dive into information of **PyDolphinScheduler**. |
||||||
|
|
||||||
|
Example |
||||||
|
------- |
||||||
|
|
||||||
|
.. literalinclude:: ../../../examples/task_condition_example.py |
||||||
|
:start-after: [start workflow_declare] |
||||||
|
:end-before: [end workflow_declare] |
||||||
|
|
||||||
|
Dive Into |
||||||
|
--------- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.condition |
@ -0,0 +1,33 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Datax |
||||||
|
===== |
||||||
|
|
||||||
|
A DataX task type's example and dive into information of **PyDolphinScheduler**. |
||||||
|
|
||||||
|
Example |
||||||
|
------- |
||||||
|
|
||||||
|
.. literalinclude:: ../../../examples/task_datax_example.py |
||||||
|
:start-after: [start workflow_declare] |
||||||
|
:end-before: [end workflow_declare] |
||||||
|
|
||||||
|
Dive Into |
||||||
|
--------- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.datax |
@ -0,0 +1,33 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Dependent |
||||||
|
========= |
||||||
|
|
||||||
|
A dependent task type's example and dive into information of **PyDolphinScheduler**. |
||||||
|
|
||||||
|
Example |
||||||
|
------- |
||||||
|
|
||||||
|
.. literalinclude:: ../../../examples/task_dependent_example.py |
||||||
|
:start-after: [start workflow_declare] |
||||||
|
:end-before: [end workflow_declare] |
||||||
|
|
||||||
|
Dive Into |
||||||
|
--------- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.dependent |
@ -0,0 +1,33 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Flink |
||||||
|
===== |
||||||
|
|
||||||
|
A flink task type's example and dive into information of **PyDolphinScheduler**. |
||||||
|
|
||||||
|
Example |
||||||
|
------- |
||||||
|
|
||||||
|
.. literalinclude:: ../../../examples/task_flink_example.py |
||||||
|
:start-after: [start workflow_declare] |
||||||
|
:end-before: [end workflow_declare] |
||||||
|
|
||||||
|
Dive Into |
||||||
|
--------- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.flink |
@ -0,0 +1,21 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
HTTP |
||||||
|
==== |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.http |
@ -0,0 +1,41 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Tasks |
||||||
|
===== |
||||||
|
|
||||||
|
In this section |
||||||
|
|
||||||
|
.. toctree:: |
||||||
|
:maxdepth: 1 |
||||||
|
|
||||||
|
shell |
||||||
|
sql |
||||||
|
python |
||||||
|
http |
||||||
|
|
||||||
|
switch |
||||||
|
condition |
||||||
|
dependent |
||||||
|
|
||||||
|
spark |
||||||
|
flink |
||||||
|
map_reduce |
||||||
|
procedure |
||||||
|
|
||||||
|
datax |
||||||
|
sub_process |
@ -0,0 +1,34 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Map Reduce |
||||||
|
========== |
||||||
|
|
||||||
|
|
||||||
|
A Map Reduce task type's example and dive into information of **PyDolphinScheduler**. |
||||||
|
|
||||||
|
Example |
||||||
|
------- |
||||||
|
|
||||||
|
.. literalinclude:: ../../../examples/task_map_reduce_example.py |
||||||
|
:start-after: [start workflow_declare] |
||||||
|
:end-before: [end workflow_declare] |
||||||
|
|
||||||
|
Dive Into |
||||||
|
--------- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.map_reduce |
@ -0,0 +1,21 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Procedure |
||||||
|
========= |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.procedure |
@ -0,0 +1,21 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Python |
||||||
|
====== |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.python |
@ -0,0 +1,33 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Shell |
||||||
|
===== |
||||||
|
|
||||||
|
A shell task type's example and dive into information of **PyDolphinScheduler**. |
||||||
|
|
||||||
|
Example |
||||||
|
------- |
||||||
|
|
||||||
|
.. literalinclude:: ../../../examples/tutorial.py |
||||||
|
:start-after: [start workflow_declare] |
||||||
|
:end-before: [end task_relation_declare] |
||||||
|
|
||||||
|
Dive Into |
||||||
|
--------- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.shell |
@ -0,0 +1,33 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Spark |
||||||
|
===== |
||||||
|
|
||||||
|
A spark task type's example and dive into information of **PyDolphinScheduler**. |
||||||
|
|
||||||
|
Example |
||||||
|
------- |
||||||
|
|
||||||
|
.. literalinclude:: ../../../examples/task_spark_example.py |
||||||
|
:start-after: [start workflow_declare] |
||||||
|
:end-before: [end workflow_declare] |
||||||
|
|
||||||
|
Dive Into |
||||||
|
--------- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.spark |
@ -0,0 +1,21 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
SQL |
||||||
|
=== |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.sql |
@ -0,0 +1,21 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Sub Process |
||||||
|
=========== |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.sub_process |
@ -0,0 +1,33 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Switch |
||||||
|
====== |
||||||
|
|
||||||
|
A switch task type's example and dive into information of **PyDolphinScheduler**. |
||||||
|
|
||||||
|
Example |
||||||
|
------- |
||||||
|
|
||||||
|
.. literalinclude:: ../../../examples/task_switch_example.py |
||||||
|
:start-after: [start workflow_declare] |
||||||
|
:end-before: [end workflow_declare] |
||||||
|
|
||||||
|
Dive Into |
||||||
|
--------- |
||||||
|
|
||||||
|
.. automodule:: pydolphinscheduler.tasks.switch |
@ -0,0 +1,150 @@ |
|||||||
|
.. Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
or more contributor license agreements. See the NOTICE file |
||||||
|
distributed with this work for additional information |
||||||
|
regarding copyright ownership. The ASF licenses this file |
||||||
|
to you under the Apache License, Version 2.0 (the |
||||||
|
"License"); you may not use this file except in compliance |
||||||
|
with the License. You may obtain a copy of the License at |
||||||
|
|
||||||
|
.. http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
.. Unless required by applicable law or agreed to in writing, |
||||||
|
software distributed under the License is distributed on an |
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
KIND, either express or implied. See the License for the |
||||||
|
specific language governing permissions and limitations |
||||||
|
under the License. |
||||||
|
|
||||||
|
Tutorial |
||||||
|
======== |
||||||
|
|
||||||
|
This tutorial show you the basic concept of *PyDolphinScheduler* and tell all |
||||||
|
things you should know before you submit or run your first workflow. If you |
||||||
|
still not install *PyDolphinScheduler* and start Apache DolphinScheduler, you |
||||||
|
could go and see :ref:`how to getting start PyDolphinScheduler <start:getting started>` |
||||||
|
|
||||||
|
Overview of Tutorial |
||||||
|
-------------------- |
||||||
|
|
||||||
|
Here have an overview of our tutorial, and it look a little complex but do not |
||||||
|
worry about that because we explain this example below as detailed as possible. |
||||||
|
|
||||||
|
.. literalinclude:: ../../examples/tutorial.py |
||||||
|
:start-after: [start tutorial] |
||||||
|
:end-before: [end tutorial] |
||||||
|
|
||||||
|
Import Necessary Module |
||||||
|
----------------------- |
||||||
|
|
||||||
|
First of all, we should importing necessary module which we would use later just |
||||||
|
like other Python package. We just create a minimum demo here, so we just import |
||||||
|
:class:`pydolphinscheduler.core.process_definition` and |
||||||
|
:class:`pydolphinscheduler.tasks.shell`. |
||||||
|
|
||||||
|
.. literalinclude:: ../../examples/tutorial.py |
||||||
|
:start-after: [start package_import] |
||||||
|
:end-before: [end package_import] |
||||||
|
|
||||||
|
If you want to use other task type you could click and |
||||||
|
:doc:`see all tasks we support <tasks/index>` |
||||||
|
|
||||||
|
Process Definition Declaration |
||||||
|
------------------------------ |
||||||
|
|
||||||
|
We should instantiate object after we import them from `import necessary module`_. |
||||||
|
Here we declare basic arguments for process definition(aka, workflow). We define |
||||||
|
the name of process definition, using `Python context manager`_ and it |
||||||
|
**the only required argument** for object process definition. Beside that we also |
||||||
|
declare three arguments named `schedule`, `start_time` which setting workflow schedule |
||||||
|
interval and schedule start_time, and argument `tenant` which changing workflow's |
||||||
|
task running user in the worker, :ref:`section tenant <concept:tenant>` in *PyDolphinScheduler* |
||||||
|
:doc:`concept` page have more detail information. |
||||||
|
|
||||||
|
.. literalinclude:: ../../examples/tutorial.py |
||||||
|
:start-after: [start workflow_declare] |
||||||
|
:end-before: [end workflow_declare] |
||||||
|
|
||||||
|
We could find more detail about process definition in |
||||||
|
:ref:`concept about process definition <concept:process definition>` if you interested in it. |
||||||
|
For all arguments of object process definition, you could find in the |
||||||
|
:class:`pydolphinscheduler.core.process_definition` api documentation. |
||||||
|
|
||||||
|
Task Declaration |
||||||
|
---------------- |
||||||
|
|
||||||
|
Here we declare four tasks, and bot of them are simple task of |
||||||
|
:class:`pydolphinscheduler.tasks.shell` which running `echo` command in terminal. |
||||||
|
Beside the argument `command`, we also need setting argument `name` for each task *(not |
||||||
|
only shell task, `name` is required for each type of task)*. |
||||||
|
|
||||||
|
.. literalinclude:: ../../examples/tutorial.py |
||||||
|
:dedent: 0 |
||||||
|
:start-after: [start task_declare] |
||||||
|
:end-before: [end task_declare] |
||||||
|
|
||||||
|
Beside shell task, *PyDolphinScheduler* support multiple tasks and you could |
||||||
|
find in :doc:`tasks/index`. |
||||||
|
|
||||||
|
Setting Task Dependence |
||||||
|
----------------------- |
||||||
|
|
||||||
|
After we declare both process definition and task, we have one workflow with |
||||||
|
four tasks, both all tasks is independent so that they would run in parallel. |
||||||
|
We should reorder the sort and the dependence of tasks. It useful when we need |
||||||
|
run prepare task before we run actual task or we need tasks running is specific |
||||||
|
rule. We both support attribute `set_downstream` and `set_upstream`, or bitwise |
||||||
|
operators `>>` and `<<`. |
||||||
|
|
||||||
|
In this example, we set task `task_parent` is the upstream task of task |
||||||
|
`task_child_one` and `task_child_two`, and task `task_union` is the downstream |
||||||
|
task of both these two task. |
||||||
|
|
||||||
|
.. literalinclude:: ../../examples/tutorial.py |
||||||
|
:dedent: 0 |
||||||
|
:start-after: [start task_relation_declare] |
||||||
|
:end-before: [end task_relation_declare] |
||||||
|
|
||||||
|
Please notice that we could grouping some tasks and set dependence if they have |
||||||
|
same downstream or upstream. We declare task `task_child_one` and `task_child_two` |
||||||
|
as a group here, named as `task_group` and set task `task_parent` as upstream of |
||||||
|
both of them. You could see more detail in :ref:`concept:Tasks Dependence` section in concept |
||||||
|
documentation. |
||||||
|
|
||||||
|
Submit Or Run Workflow |
||||||
|
---------------------- |
||||||
|
|
||||||
|
Now we finish our workflow definition, with task and task dependence, but all |
||||||
|
these things are in local, we should let Apache DolphinScheduler daemon know what we |
||||||
|
define our workflow. So the last thing we have to do here is submit our workflow to |
||||||
|
Apache DolphinScheduler daemon. |
||||||
|
|
||||||
|
We here in the example using `ProcessDefinition` attribute `run` to submit workflow |
||||||
|
to the daemon, and set the schedule time we just declare in `process definition declaration`_. |
||||||
|
|
||||||
|
Now, we could run the Python code like other Python script, for the basic usage run |
||||||
|
:code:`python tutorial.py` to trigger and run it. |
||||||
|
|
||||||
|
.. literalinclude:: ../../examples/tutorial.py |
||||||
|
:dedent: 0 |
||||||
|
:start-after: [start submit_or_run] |
||||||
|
:end-before: [end submit_or_run] |
||||||
|
|
||||||
|
If you not start your Apache DolphinScheduler server, you could find the way in |
||||||
|
:ref:`start:start Python gateway server` and it would have more detail about related server |
||||||
|
start. Beside attribute `run`, we have attribute `submit` for object `ProcessDefinition` |
||||||
|
and it just submit workflow to the daemon but not setting the schedule information. For |
||||||
|
more detail you could see :ref:`concept:process definition`. |
||||||
|
|
||||||
|
DAG Graph After Tutorial Run |
||||||
|
---------------------------- |
||||||
|
|
||||||
|
After we run the tutorial code, you could login Apache DolphinScheduler web UI, |
||||||
|
go and see the `DolphinScheduler project page`_. they is a new process definition be |
||||||
|
created and named "Tutorial". It create by *PyDolphinScheduler* and the DAG graph as below |
||||||
|
|
||||||
|
.. literalinclude:: ../../examples/tutorial.py |
||||||
|
:language: text |
||||||
|
:lines: 24-28 |
||||||
|
|
||||||
|
.. _`DolphinScheduler project page`: https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/project.html |
||||||
|
.. _`Python context manager`: https://docs.python.org/3/library/stdtypes.html#context-manager-types |
@ -0,0 +1,59 @@ |
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
# or more contributor license agreements. See the NOTICE file |
||||||
|
# distributed with this work for additional information |
||||||
|
# regarding copyright ownership. The ASF licenses this file |
||||||
|
# to you under the Apache License, Version 2.0 (the |
||||||
|
# "License"); you may not use this file except in compliance |
||||||
|
# with the License. You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, |
||||||
|
# software distributed under the License is distributed on an |
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
# KIND, either express or implied. See the License for the |
||||||
|
# specific language governing permissions and limitations |
||||||
|
# under the License. |
||||||
|
|
||||||
|
"""Test pydolphinscheduler docs.""" |
||||||
|
|
||||||
|
import re |
||||||
|
|
||||||
|
from tests.testing.constants import task_without_example |
||||||
|
from tests.testing.path import get_doc_tasks, get_tasks |
||||||
|
|
||||||
|
ignore_code_file = {"__init__.py"} |
||||||
|
ignore_doc_file = {"index.rst"} |
||||||
|
|
||||||
|
|
||||||
|
def test_without_missing_task_rst(): |
||||||
|
"""Test without missing any task document by compare filename. |
||||||
|
|
||||||
|
Avoiding add new type of tasks but without adding document about it. |
||||||
|
""" |
||||||
|
code_files = {p.stem for p in get_tasks(ignore_name=ignore_code_file)} |
||||||
|
doc_files = {p.stem for p in get_doc_tasks(ignore_name=ignore_doc_file)} |
||||||
|
assert code_files == doc_files |
||||||
|
|
||||||
|
|
||||||
|
def test_task_without_example(): |
||||||
|
"""Test task document which without example. |
||||||
|
|
||||||
|
Avoiding add new type of tasks but without adding example content describe how to use it. |
||||||
|
""" |
||||||
|
task_without_example_detected = set() |
||||||
|
pattern = re.compile("Example\n-------") |
||||||
|
|
||||||
|
for doc in get_doc_tasks(ignore_name=ignore_doc_file): |
||||||
|
search_result = pattern.search(doc.read_text()) |
||||||
|
if not search_result: |
||||||
|
task_without_example_detected.add(doc.stem) |
||||||
|
assert task_without_example == task_without_example_detected |
||||||
|
|
||||||
|
|
||||||
|
def test_doc_automodule_directive_name(): |
||||||
|
"""Test task document with correct name in directive automodule.""" |
||||||
|
pattern = re.compile(".. automodule:: (.*)") |
||||||
|
for doc in get_doc_tasks(ignore_name=ignore_doc_file): |
||||||
|
match_string = pattern.search(doc.read_text()).group(1) |
||||||
|
assert f"pydolphinscheduler.tasks.{doc.stem}" == match_string |
@ -0,0 +1,28 @@ |
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
# or more contributor license agreements. See the NOTICE file |
||||||
|
# distributed with this work for additional information |
||||||
|
# regarding copyright ownership. The ASF licenses this file |
||||||
|
# to you under the Apache License, Version 2.0 (the |
||||||
|
# "License"); you may not use this file except in compliance |
||||||
|
# with the License. You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, |
||||||
|
# software distributed under the License is distributed on an |
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
# KIND, either express or implied. See the License for the |
||||||
|
# specific language governing permissions and limitations |
||||||
|
# under the License. |
||||||
|
|
||||||
|
"""Constants variables for test module.""" |
||||||
|
|
||||||
|
# Record some task without example in directory `example`. Some of them maybe can not write example, |
||||||
|
# but most of them just without adding by mistake, and we should add it later. |
||||||
|
task_without_example = { |
||||||
|
"sql", |
||||||
|
"http", |
||||||
|
"sub_process", |
||||||
|
"python", |
||||||
|
"procedure", |
||||||
|
} |
@ -0,0 +1,54 @@ |
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one |
||||||
|
# or more contributor license agreements. See the NOTICE file |
||||||
|
# distributed with this work for additional information |
||||||
|
# regarding copyright ownership. The ASF licenses this file |
||||||
|
# to you under the Apache License, Version 2.0 (the |
||||||
|
# "License"); you may not use this file except in compliance |
||||||
|
# with the License. You may obtain a copy of the License at |
||||||
|
# |
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
# |
||||||
|
# Unless required by applicable law or agreed to in writing, |
||||||
|
# software distributed under the License is distributed on an |
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
||||||
|
# KIND, either express or implied. See the License for the |
||||||
|
# specific language governing permissions and limitations |
||||||
|
# under the License. |
||||||
|
|
||||||
|
"""Handle path related issue in test module.""" |
||||||
|
|
||||||
|
from pathlib import Path |
||||||
|
from typing import Any, Generator |
||||||
|
|
||||||
|
path_code_tasks = Path(__file__).parent.parent.parent.joinpath( |
||||||
|
"src", "pydolphinscheduler", "tasks" |
||||||
|
) |
||||||
|
path_example = Path(__file__).parent.parent.parent.joinpath("examples") |
||||||
|
path_doc_tasks = Path(__file__).parent.parent.parent.joinpath("docs", "source", "tasks") |
||||||
|
|
||||||
|
|
||||||
|
def get_all_examples() -> Generator[Path, Any, None]: |
||||||
|
"""Get all examples files path in examples directory.""" |
||||||
|
return (ex for ex in path_example.iterdir() if ex.is_file()) |
||||||
|
|
||||||
|
|
||||||
|
def get_tasks(ignore_name: set = None) -> Generator[Path, Any, None]: |
||||||
|
"""Get all tasks files path in src/pydolphinscheduler/tasks directory.""" |
||||||
|
if not ignore_name: |
||||||
|
ignore_name = set() |
||||||
|
return ( |
||||||
|
ex |
||||||
|
for ex in path_code_tasks.iterdir() |
||||||
|
if ex.is_file() and ex.name not in ignore_name |
||||||
|
) |
||||||
|
|
||||||
|
|
||||||
|
def get_doc_tasks(ignore_name: set = None) -> Generator[Path, Any, None]: |
||||||
|
"""Get all tasks document path in docs/source/tasks directory.""" |
||||||
|
if not ignore_name: |
||||||
|
ignore_name = set() |
||||||
|
return ( |
||||||
|
ex |
||||||
|
for ex in path_doc_tasks.iterdir() |
||||||
|
if ex.is_file() and ex.name not in ignore_name |
||||||
|
) |
Loading…
Reference in new issue