Browse Source

[python] Initiate document for pydolphinscheduler (#8005)

3.0.0/version-upgrade
Jiajie Zhong 3 years ago committed by GitHub
parent
commit
35578efd08
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 17
      .github/workflows/py-ci.yml
  2. 1
      .licenserc.yaml
  3. 1
      dolphinscheduler-python/pydolphinscheduler/.flake8
  4. 39
      dolphinscheduler-python/pydolphinscheduler/docs/Makefile
  5. 54
      dolphinscheduler-python/pydolphinscheduler/docs/make.bat
  6. 0
      dolphinscheduler-python/pydolphinscheduler/docs/source/_static/.gitkeep
  7. 47
      dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst
  8. 151
      dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst
  9. 88
      dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py
  10. 42
      dolphinscheduler-python/pydolphinscheduler/docs/source/index.rst
  11. 113
      dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst
  12. 33
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst
  13. 33
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst
  14. 33
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst
  15. 33
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst
  16. 21
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst
  17. 41
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst
  18. 34
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst
  19. 21
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst
  20. 21
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst
  21. 33
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst
  22. 33
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst
  23. 21
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst
  24. 21
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst
  25. 33
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst
  26. 150
      dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst
  27. 10
      dolphinscheduler-python/pydolphinscheduler/examples/task_condition_example.py
  28. 3
      dolphinscheduler-python/pydolphinscheduler/examples/task_datax_example.py
  29. 2
      dolphinscheduler-python/pydolphinscheduler/examples/task_dependent_example.py
  30. 2
      dolphinscheduler-python/pydolphinscheduler/examples/task_flink_example.py
  31. 2
      dolphinscheduler-python/pydolphinscheduler/examples/task_map_reduce_example.py
  32. 2
      dolphinscheduler-python/pydolphinscheduler/examples/task_spark_example.py
  33. 2
      dolphinscheduler-python/pydolphinscheduler/examples/task_switch_example.py
  34. 18
      dolphinscheduler-python/pydolphinscheduler/examples/tutorial.py
  35. 3
      dolphinscheduler-python/pydolphinscheduler/requirements_dev.txt
  36. 10
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py
  37. 10
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/__init__.py
  38. 30
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py
  39. 2
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py
  40. 23
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py
  41. 42
      dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py
  42. 8
      dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py
  43. 59
      dolphinscheduler-python/pydolphinscheduler/tests/test_docs.py
  44. 28
      dolphinscheduler-python/pydolphinscheduler/tests/testing/constants.py
  45. 54
      dolphinscheduler-python/pydolphinscheduler/tests/testing/path.py

17
.github/workflows/py-ci.yml

@ -97,3 +97,20 @@ jobs:
pip install -e . pip install -e .
- name: Run Tests && Check coverage - name: Run Tests && Check coverage
run: coverage run && coverage report run: coverage run && coverage report
doc-build:
name: Document Build Test
needs: pytest
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Install Development Dependences
run: |
pip install -r requirements_dev.txt
pip install -e .
- name: Test Build Document
working-directory: dolphinscheduler-python/pydolphinscheduler/docs
run: make clean && make html

1
.licenserc.yaml

@ -45,5 +45,6 @@ header:
- '.github/actions/comment-on-issue/**' - '.github/actions/comment-on-issue/**'
- '.github/actions/reviewdog-setup/**' - '.github/actions/reviewdog-setup/**'
- '.github/actions/translate-on-issue/**' - '.github/actions/translate-on-issue/**'
- '**/.gitkeep'
comment: on-failure comment: on-failure

1
dolphinscheduler-python/pydolphinscheduler/.flake8

@ -35,3 +35,4 @@ ignore =
W503 # W503: Line breaks before binary operators W503 # W503: Line breaks before binary operators
per-file-ignores = per-file-ignores =
src/pydolphinscheduler/side/__init__.py:F401 src/pydolphinscheduler/side/__init__.py:F401
src/pydolphinscheduler/tasks/__init__.py:F401

39
dolphinscheduler-python/pydolphinscheduler/docs/Makefile

@ -0,0 +1,39 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
# Add opts `turn warnings into errors` strict sphinx-build behavior
SPHINXOPTS ?= -W
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

54
dolphinscheduler-python/pydolphinscheduler/docs/make.bat

@ -0,0 +1,54 @@
REM Licensed to the Apache Software Foundation (ASF) under one
REM or more contributor license agreements. See the NOTICE file
REM distributed with this work for additional information
REM regarding copyright ownership. The ASF licenses this file
REM to you under the Apache License, Version 2.0 (the
REM "License"); you may not use this file except in compliance
REM with the License. You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing,
REM software distributed under the License is distributed on an
REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
REM KIND, either express or implied. See the License for the
REM specific language governing permissions and limitations
REM under the License.
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
REM Add opts `turn warnings into errors` strict sphinx-build behavior
set SPHINXOPTS=-W
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

0
dolphinscheduler-python/pydolphinscheduler/docs/source/_static/.gitkeep

47
dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst

@ -0,0 +1,47 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
API
===
Core
----
.. automodule:: pydolphinscheduler.core
:inherited-members:
Sides
-----
.. automodule:: pydolphinscheduler.side
:inherited-members:
Tasks
-----
.. automodule:: pydolphinscheduler.tasks
:inherited-members:
Constants
---------
.. automodule:: pydolphinscheduler.constants
Exceptions
----------
.. automodule:: pydolphinscheduler.exceptions

151
dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst

@ -0,0 +1,151 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Concepts
========
In this section, you would know the core concepts of *PyDolphinScheduler*.
Process Definition
------------------
Process definition describe the whole things except `tasks`_ and `tasks dependence`_, which including
name, schedule interval, schedule start time and end time. You would know scheduler
Process definition could be initialized in normal assign statement or in context manger.
.. code-block:: python
# Initialization with assign statement
pd = ProcessDefinition(name="my first process definition")
# Or context manger
with ProcessDefinition(name="my first process definition") as pd:
pd.submit()
Process definition is the main object communicate between *PyDolphinScheduler* and DolphinScheduler daemon.
After process definition and task is be declared, you could use `submit` and `run` notify server your definition.
If you just want to submit your definition and create workflow, without run it, you should use attribute `submit`.
But if you want to run the workflow after you submit it, you could use attribute `run`.
.. code-block:: python
# Just submit definition, without run it
pd.submit()
# Both submit and run definition
pd.run()
Schedule
~~~~~~~~
We use parameter `schedule` determine the schedule interval of workflow, *PyDolphinScheduler* support seven
asterisks expression, and each of the meaning of position as below
.. code-block:: text
* * * * * * *
┬ ┬ ┬ ┬ ┬ ┬ ┬
│ │ │ │ │ │ │
│ │ │ │ │ │ └─── year
│ │ │ │ │ └───── day of week (0 - 7) (0 to 6 are Sunday to Saturday, or use names; 7 is Sunday, the same as 0)
│ │ │ │ └─────── month (1 - 12)
│ │ │ └───────── day of month (1 - 31)
│ │ └─────────── hour (0 - 23)
│ └───────────── min (0 - 59)
└─────────────── second (0 - 59)
Here we add some example crontab:
- `0 0 0 * * ? *`: Workflow execute every day at 00:00:00.
- `10 2 * * * ? *`: Workflow execute hourly day at ten pass two.
- `10,11 20 0 1,2 * ? *`: Workflow execute first and second day of month at 00:20:10 and 00:20:11.
Tenant
~~~~~~
Tenant is the user who run task command in machine or in virtual machine. it could be assign by simple string.
.. code-block:: python
#
pd = ProcessDefinition(name="process definition tenant", tenant="tenant_exists")
.. note::
Make should tenant exists in target machine, otherwise it will raise an error when you try to run command
Tasks
-----
Task is the minimum unit running actual job, and it is nodes of DAG, aka directed acyclic graph. You could define
what you want to in the task. It have some required parameter to make uniqueness and definition.
Here we use :py:meth:`pydolphinscheduler.tasks.Shell` as example, parameter `name` and `command` is required and must be provider. Parameter
`name` set name to the task, and parameter `command` declare the command you wish to run in this task.
.. code-block:: python
# We named this task as "shell", and just run command `echo shell task`
shell_task = Shell(name="shell", command="echo shell task")
If you want to see all type of tasks, you could see :doc:`tasks/index`.
Tasks Dependence
~~~~~~~~~~~~~~~~
You could define many tasks in on single `Process Definition`_. If all those task is in parallel processing,
then you could leave them alone without adding any additional information. But if there have some tasks should
not be run unless pre task in workflow have be done, we should set task dependence to them. Set tasks dependence
have two mainly way and both of them is easy. You could use bitwise operator `>>` and `<<`, or task attribute
`set_downstream` and `set_upstream` to do it.
.. code-block:: python
# Set task1 as task2 upstream
task1 >> task2
# You could use attribute `set_downstream` too, is same as `task1 >> task2`
task1.set_downstream(task2)
# Set task1 as task2 downstream
task1 << task2
# It is same as attribute `set_upstream`
task1.set_upstream(task2)
# Beside, we could set dependence between task and sequence of tasks,
# we set `task1` is upstream to both `task2` and `task3`. It is useful
# for some tasks have same dependence.
task1 >> [task2, task3]
Task With Process Definition
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In most of data orchestration cases, you should assigned attribute `process_definition` to task instance to
decide workflow of task. You could set `process_definition` in both normal assign or in context manger mode
.. code-block:: python
# Normal assign, have to explicit declaration and pass `ProcessDefinition` instance to task
pd = ProcessDefinition(name="my first process definition")
shell_task = Shell(name="shell", command="echo shell task", process_definition=pd)
# Context manger, `ProcessDefinition` instance pd would implicit declaration to task
with ProcessDefinition(name="my first process definition") as pd:
shell_task = Shell(name="shell", command="echo shell task",
With both `Process Definition`_, `Tasks`_ and `Tasks Dependence`_, we could build a workflow with multiple tasks.

88
dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py

@ -0,0 +1,88 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = "pydolphinscheduler"
copyright = "2022, apache"
author = "apache"
# The full version, including alpha/beta/rc tags
release = "0.0.1"
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
# Measures durations of Sphinx processing
"sphinx.ext.duration",
# Semi-automatic make docstrings to document
"sphinx.ext.autodoc",
"sphinx.ext.viewcode",
"sphinx.ext.autosectionlabel",
"sphinx_rtd_theme",
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []
autodoc_default_options = {
"members": True,
"show-inheritance": True,
"private-members": True,
"undoc-members": True,
"member-order": "groupwise",
}
autosectionlabel_prefix_document = True
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "sphinx_rtd_theme"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]

42
dolphinscheduler-python/pydolphinscheduler/docs/source/index.rst

@ -0,0 +1,42 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
PyDolphinScheduler
==================
**PyDolphinScheduler** is Python API for `Apache DolphinScheduler <https://dolphinscheduler.apache.org>`_,
which allow you definition your workflow by Python code, aka workflow-as-codes.
I could go and find how to :ref:`install <start:getting started>` the project. Or if you want to see simply example
then go and see :doc:`tutorial` for more detail.
.. toctree::
:maxdepth: 2
start
tutorial
concept
tasks/index
api
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

113
dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst

@ -0,0 +1,113 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Getting Started
===============
To get started with *PyDolphinScheduler* you must ensure python and pip
installed on your machine, if you're already set up, you can skip straight
to `Installing PyDolphinScheduler`_, otherwise please continue with
`Installing Python`_.
Installing Python
-----------------
How to install `python` and `pip` depends on what operating system
you're using. The python wiki provides up to date
`instructions for all platforms here`_. When you entering the website
and choice your operating system, you would be offered the choice and
select python version. *PyDolphinScheduler* recommend use version above
Python 3.6 and we highly recommend you install *Stable Releases* instead
of *Pre-releases*.
After you have download and installed Python, you should open your terminal,
typing and running :code:`python --version` to check whether the installation
is correct or not. If all thing good, you could see the version in console
without error(here is a example after Python 3.8.7 installed)
.. code-block:: bash
$ python --version
Python 3.8.7
Installing PyDolphinScheduler
-----------------------------
After Python is already installed on your machine following section
`installing Python`_, it easy to *PyDolphinScheduler* by pip.
.. code-block:: bash
$ pip install apache-dolphinscheduler
The latest version of *PyDolphinScheduler* would be installed after you run above
command in your terminal. You could go and `start Python Gateway Server`_ to finish
the prepare, and then go to :doc:`tutorial` to make your hand dirty. But if you
want to install the unreleased version of *PyDolphinScheduler*, you could go and see
section `installing PyDolphinScheduler in dev`_ for more detail.
Installing PyDolphinScheduler In Dev
------------------------------------
Because the project is developing and some of the features still not release.
If you want to try some thing unreleased you could install from the source code
which we hold in GitHub
.. code-block:: bash
# Clone Apache DolphinScheduler repository
$ git clone git@github.com:apache/dolphinscheduler.git
# Install PyDolphinScheduler in develop mode
$ cd dolphinscheduler-python/pydolphinscheduler && pip install -e .
After you installed *PyDolphinScheduler*, please remember `start Python Gateway Server`_
which waiting for *PyDolphinScheduler*'s workflow definition require.
Start Python Gateway Server
---------------------------
Since **PyDolphinScheduler** is Python API for `Apache DolphinScheduler`_, it
could define workflow and tasks structure, but could not run it unless you
`install Apache DolphinScheduler`_ and start Python gateway server. We only
and some key steps here and you could go `install Apache DolphinScheduler`_
for more detail
.. code-block:: bash
# Start pythonGatewayServer
$ ./bin/dolphinscheduler-daemon.sh start pythonGatewayServer
To check whether the server is alive or not, you could run :code:`jps`. And
the server is health if keyword `PythonGatewayServer` in the console.
.. code-block:: bash
$ jps
....
201472 PythonGatewayServer
....
What's More
-----------
If you do not familiar with *PyDolphinScheduler*, you could go to :doc:`tutorial`
and see how it work. But if you already know the inside of *PyDolphinScheduler*,
maybe you could go and play with all :doc:`tasks/index` *PyDolphinScheduler* supports.
.. _`instructions for all platforms here`: https://wiki.python.org/moin/BeginnersGuide/Download
.. _`Apache DolphinScheduler`: https://dolphinscheduler.apache.org
.. _`install Apache DolphinScheduler`: https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/installation/standalone.html

33
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst

@ -0,0 +1,33 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Condition
=========
A condition task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../examples/task_condition_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.condition

33
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst

@ -0,0 +1,33 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Datax
=====
A DataX task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../examples/task_datax_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.datax

33
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst

@ -0,0 +1,33 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Dependent
=========
A dependent task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../examples/task_dependent_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.dependent

33
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst

@ -0,0 +1,33 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Flink
=====
A flink task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../examples/task_flink_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.flink

21
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst

@ -0,0 +1,21 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
HTTP
====
.. automodule:: pydolphinscheduler.tasks.http

41
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst

@ -0,0 +1,41 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Tasks
=====
In this section
.. toctree::
:maxdepth: 1
shell
sql
python
http
switch
condition
dependent
spark
flink
map_reduce
procedure
datax
sub_process

34
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst

@ -0,0 +1,34 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Map Reduce
==========
A Map Reduce task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../examples/task_map_reduce_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.map_reduce

21
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst

@ -0,0 +1,21 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Procedure
=========
.. automodule:: pydolphinscheduler.tasks.procedure

21
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst

@ -0,0 +1,21 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Python
======
.. automodule:: pydolphinscheduler.tasks.python

33
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst

@ -0,0 +1,33 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Shell
=====
A shell task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../examples/tutorial.py
:start-after: [start workflow_declare]
:end-before: [end task_relation_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.shell

33
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst

@ -0,0 +1,33 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Spark
=====
A spark task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../examples/task_spark_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.spark

21
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst

@ -0,0 +1,21 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
SQL
===
.. automodule:: pydolphinscheduler.tasks.sql

21
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst

@ -0,0 +1,21 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Sub Process
===========
.. automodule:: pydolphinscheduler.tasks.sub_process

33
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst

@ -0,0 +1,33 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Switch
======
A switch task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../examples/task_switch_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.switch

150
dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst

@ -0,0 +1,150 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Tutorial
========
This tutorial show you the basic concept of *PyDolphinScheduler* and tell all
things you should know before you submit or run your first workflow. If you
still not install *PyDolphinScheduler* and start Apache DolphinScheduler, you
could go and see :ref:`how to getting start PyDolphinScheduler <start:getting started>`
Overview of Tutorial
--------------------
Here have an overview of our tutorial, and it look a little complex but do not
worry about that because we explain this example below as detailed as possible.
.. literalinclude:: ../../examples/tutorial.py
:start-after: [start tutorial]
:end-before: [end tutorial]
Import Necessary Module
-----------------------
First of all, we should importing necessary module which we would use later just
like other Python package. We just create a minimum demo here, so we just import
:class:`pydolphinscheduler.core.process_definition` and
:class:`pydolphinscheduler.tasks.shell`.
.. literalinclude:: ../../examples/tutorial.py
:start-after: [start package_import]
:end-before: [end package_import]
If you want to use other task type you could click and
:doc:`see all tasks we support <tasks/index>`
Process Definition Declaration
------------------------------
We should instantiate object after we import them from `import necessary module`_.
Here we declare basic arguments for process definition(aka, workflow). We define
the name of process definition, using `Python context manager`_ and it
**the only required argument** for object process definition. Beside that we also
declare three arguments named `schedule`, `start_time` which setting workflow schedule
interval and schedule start_time, and argument `tenant` which changing workflow's
task running user in the worker, :ref:`section tenant <concept:tenant>` in *PyDolphinScheduler*
:doc:`concept` page have more detail information.
.. literalinclude:: ../../examples/tutorial.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
We could find more detail about process definition in
:ref:`concept about process definition <concept:process definition>` if you interested in it.
For all arguments of object process definition, you could find in the
:class:`pydolphinscheduler.core.process_definition` api documentation.
Task Declaration
----------------
Here we declare four tasks, and bot of them are simple task of
:class:`pydolphinscheduler.tasks.shell` which running `echo` command in terminal.
Beside the argument `command`, we also need setting argument `name` for each task *(not
only shell task, `name` is required for each type of task)*.
.. literalinclude:: ../../examples/tutorial.py
:dedent: 0
:start-after: [start task_declare]
:end-before: [end task_declare]
Beside shell task, *PyDolphinScheduler* support multiple tasks and you could
find in :doc:`tasks/index`.
Setting Task Dependence
-----------------------
After we declare both process definition and task, we have one workflow with
four tasks, both all tasks is independent so that they would run in parallel.
We should reorder the sort and the dependence of tasks. It useful when we need
run prepare task before we run actual task or we need tasks running is specific
rule. We both support attribute `set_downstream` and `set_upstream`, or bitwise
operators `>>` and `<<`.
In this example, we set task `task_parent` is the upstream task of task
`task_child_one` and `task_child_two`, and task `task_union` is the downstream
task of both these two task.
.. literalinclude:: ../../examples/tutorial.py
:dedent: 0
:start-after: [start task_relation_declare]
:end-before: [end task_relation_declare]
Please notice that we could grouping some tasks and set dependence if they have
same downstream or upstream. We declare task `task_child_one` and `task_child_two`
as a group here, named as `task_group` and set task `task_parent` as upstream of
both of them. You could see more detail in :ref:`concept:Tasks Dependence` section in concept
documentation.
Submit Or Run Workflow
----------------------
Now we finish our workflow definition, with task and task dependence, but all
these things are in local, we should let Apache DolphinScheduler daemon know what we
define our workflow. So the last thing we have to do here is submit our workflow to
Apache DolphinScheduler daemon.
We here in the example using `ProcessDefinition` attribute `run` to submit workflow
to the daemon, and set the schedule time we just declare in `process definition declaration`_.
Now, we could run the Python code like other Python script, for the basic usage run
:code:`python tutorial.py` to trigger and run it.
.. literalinclude:: ../../examples/tutorial.py
:dedent: 0
:start-after: [start submit_or_run]
:end-before: [end submit_or_run]
If you not start your Apache DolphinScheduler server, you could find the way in
:ref:`start:start Python gateway server` and it would have more detail about related server
start. Beside attribute `run`, we have attribute `submit` for object `ProcessDefinition`
and it just submit workflow to the daemon but not setting the schedule information. For
more detail you could see :ref:`concept:process definition`.
DAG Graph After Tutorial Run
----------------------------
After we run the tutorial code, you could login Apache DolphinScheduler web UI,
go and see the `DolphinScheduler project page`_. they is a new process definition be
created and named "Tutorial". It create by *PyDolphinScheduler* and the DAG graph as below
.. literalinclude:: ../../examples/tutorial.py
:language: text
:lines: 24-28
.. _`DolphinScheduler project page`: https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/project.html
.. _`Python context manager`: https://docs.python.org/3/library/stdtypes.html#context-manager-types

10
dolphinscheduler-python/pydolphinscheduler/examples/task_conditions_example.py → dolphinscheduler-python/pydolphinscheduler/examples/task_condition_example.py

@ -15,6 +15,7 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
# [start workflow_declare]
r""" r"""
A example workflow for task condition. A example workflow for task condition.
@ -31,10 +32,10 @@ pre_task_3 -> -> fail_branch
""" """
from pydolphinscheduler.core.process_definition import ProcessDefinition from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.condition import FAILURE, SUCCESS, And, Conditions from pydolphinscheduler.tasks.condition import FAILURE, SUCCESS, And, Condition
from pydolphinscheduler.tasks.shell import Shell from pydolphinscheduler.tasks.shell import Shell
with ProcessDefinition(name="task_conditions_example", tenant="tenant_exists") as pd: with ProcessDefinition(name="task_condition_example", tenant="tenant_exists") as pd:
pre_task_1 = Shell(name="pre_task_1", command="echo pre_task_1") pre_task_1 = Shell(name="pre_task_1", command="echo pre_task_1")
pre_task_2 = Shell(name="pre_task_2", command="echo pre_task_2") pre_task_2 = Shell(name="pre_task_2", command="echo pre_task_2")
pre_task_3 = Shell(name="pre_task_3", command="echo pre_task_3") pre_task_3 = Shell(name="pre_task_3", command="echo pre_task_3")
@ -48,10 +49,11 @@ with ProcessDefinition(name="task_conditions_example", tenant="tenant_exists") a
success_branch = Shell(name="success_branch", command="echo success_branch") success_branch = Shell(name="success_branch", command="echo success_branch")
fail_branch = Shell(name="fail_branch", command="echo fail_branch") fail_branch = Shell(name="fail_branch", command="echo fail_branch")
condition = Conditions( condition = Condition(
name="conditions", name="condition",
condition=cond_operator, condition=cond_operator,
success_task=success_branch, success_task=success_branch,
failed_task=fail_branch, failed_task=fail_branch,
) )
pd.submit() pd.submit()
# [end workflow_declare]

3
dolphinscheduler-python/pydolphinscheduler/examples/task_datax_example.py

@ -15,6 +15,7 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
# [start workflow_declare]
""" """
A example workflow for task datax. A example workflow for task datax.
@ -24,7 +25,6 @@ You can create data sources `first_mysql` and `first_mysql` through UI.
It creates a task to synchronize datax from the source database to the target database. It creates a task to synchronize datax from the source database to the target database.
""" """
from pydolphinscheduler.core.process_definition import ProcessDefinition from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.datax import CustomDataX, DataX from pydolphinscheduler.tasks.datax import CustomDataX, DataX
@ -92,3 +92,4 @@ with ProcessDefinition(
# datax job same as task1, transfer record from `first_mysql` to `second_mysql` # datax job same as task1, transfer record from `first_mysql` to `second_mysql`
task2 = CustomDataX(name="task_custom_datax", json=str(JSON_TEMPLATE)) task2 = CustomDataX(name="task_custom_datax", json=str(JSON_TEMPLATE))
pd.run() pd.run()
# [end workflow_declare]

2
dolphinscheduler-python/pydolphinscheduler/examples/task_dependent_example.py

@ -15,6 +15,7 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
# [start workflow_declare]
r""" r"""
A example workflow for task dependent. A example workflow for task dependent.
@ -70,3 +71,4 @@ with ProcessDefinition(
), ),
) )
pd.submit() pd.submit()
# [end workflow_declare]

2
dolphinscheduler-python/pydolphinscheduler/examples/task_flink_example.py

@ -15,6 +15,7 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
# [start workflow_declare]
"""A example workflow for task flink.""" """A example workflow for task flink."""
from pydolphinscheduler.core.process_definition import ProcessDefinition from pydolphinscheduler.core.process_definition import ProcessDefinition
@ -29,3 +30,4 @@ with ProcessDefinition(name="task_flink_example", tenant="tenant_exists") as pd:
deploy_mode=DeployMode.LOCAL, deploy_mode=DeployMode.LOCAL,
) )
pd.run() pd.run()
# [end workflow_declare]

2
dolphinscheduler-python/pydolphinscheduler/examples/task_map_reduce_example.py

@ -15,6 +15,7 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
# [start workflow_declare]
"""A example workflow for task mr.""" """A example workflow for task mr."""
from pydolphinscheduler.core.engine import ProgramType from pydolphinscheduler.core.engine import ProgramType
@ -30,3 +31,4 @@ with ProcessDefinition(name="task_map_reduce_example", tenant="tenant_exists") a
main_args="/dolphinscheduler/tenant_exists/resources/file.txt /output/ds", main_args="/dolphinscheduler/tenant_exists/resources/file.txt /output/ds",
) )
pd.run() pd.run()
# [end workflow_declare]

2
dolphinscheduler-python/pydolphinscheduler/examples/task_spark_example.py

@ -15,6 +15,7 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
# [start workflow_declare]
"""A example workflow for task spark.""" """A example workflow for task spark."""
from pydolphinscheduler.core.process_definition import ProcessDefinition from pydolphinscheduler.core.process_definition import ProcessDefinition
@ -29,3 +30,4 @@ with ProcessDefinition(name="task_spark_example", tenant="tenant_exists") as pd:
deploy_mode=DeployMode.LOCAL, deploy_mode=DeployMode.LOCAL,
) )
pd.run() pd.run()
# [end workflow_declare]

2
dolphinscheduler-python/pydolphinscheduler/examples/task_switch_example.py

@ -15,6 +15,7 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
# [start workflow_declare]
r""" r"""
A example workflow for task switch. A example workflow for task switch.
@ -47,3 +48,4 @@ with ProcessDefinition(
switch = Switch(name="switch", condition=switch_condition) switch = Switch(name="switch", condition=switch_condition)
parent >> switch parent >> switch
pd.submit() pd.submit()
# [end workflow_declare]

18
dolphinscheduler-python/pydolphinscheduler/examples/tutorial.py

@ -24,29 +24,45 @@ and workflow DAG graph as below:
--> task_child_one --> task_child_one
/ \ / \
task_parent --> --> task_union task_parent --> --> task_union
\ / \ /
--> task_child_two --> task_child_two
it will instantiate and run all the task it have. it will instantiate and run all the task it have.
""" """
# [start tutorial]
# [start package_import]
# Import ProcessDefinition object to define your workflow attributes
from pydolphinscheduler.core.process_definition import ProcessDefinition from pydolphinscheduler.core.process_definition import ProcessDefinition
# Import task Shell object cause we would create some shell tasks later
from pydolphinscheduler.tasks.shell import Shell from pydolphinscheduler.tasks.shell import Shell
# [end package_import]
# [start workflow_declare]
with ProcessDefinition( with ProcessDefinition(
name="tutorial", name="tutorial",
schedule="0 0 0 * * ? *", schedule="0 0 0 * * ? *",
start_time="2021-01-01", start_time="2021-01-01",
tenant="tenant_exists", tenant="tenant_exists",
) as pd: ) as pd:
# [end workflow_declare]
# [start task_declare]
task_parent = Shell(name="task_parent", command="echo hello pydolphinscheduler") task_parent = Shell(name="task_parent", command="echo hello pydolphinscheduler")
task_child_one = Shell(name="task_child_one", command="echo 'child one'") task_child_one = Shell(name="task_child_one", command="echo 'child one'")
task_child_two = Shell(name="task_child_two", command="echo 'child two'") task_child_two = Shell(name="task_child_two", command="echo 'child two'")
task_union = Shell(name="task_union", command="echo union") task_union = Shell(name="task_union", command="echo union")
# [end task_declare]
# [start task_relation_declare]
task_group = [task_child_one, task_child_two] task_group = [task_child_one, task_child_two]
task_parent.set_downstream(task_group) task_parent.set_downstream(task_group)
task_union << task_group task_union << task_group
# [end task_relation_declare]
# [start submit_or_run]
pd.run() pd.run()
# [end submit_or_run]
# [end tutorial]

3
dolphinscheduler-python/pydolphinscheduler/requirements_dev.txt

@ -25,3 +25,6 @@ flake8
flake8-docstrings flake8-docstrings
flake8-black flake8-black
isort isort
# Document
sphinx
sphinx_rtd_theme

10
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py

@ -16,3 +16,13 @@
# under the License. # under the License.
"""Init pydolphinscheduler.core package.""" """Init pydolphinscheduler.core package."""
from pydolphinscheduler.core.database import Database
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.core.task import Task
__all__ = [
"ProcessDefinition",
"Task",
"Database",
]

10
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/__init__.py

@ -18,5 +18,15 @@
"""Init Side package, Side package keep object related to DolphinScheduler but not in the Core part.""" """Init Side package, Side package keep object related to DolphinScheduler but not in the Core part."""
from pydolphinscheduler.side.project import Project from pydolphinscheduler.side.project import Project
from pydolphinscheduler.side.queue import Queue
from pydolphinscheduler.side.tenant import Tenant from pydolphinscheduler.side.tenant import Tenant
from pydolphinscheduler.side.user import User from pydolphinscheduler.side.user import User
from pydolphinscheduler.side.worker_group import WorkerGroup
__all__ = [
"Project",
"Tenant",
"User",
"Queue",
"WorkerGroup",
]

30
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py

@ -16,3 +16,33 @@
# under the License. # under the License.
"""Init pydolphinscheduler.tasks package.""" """Init pydolphinscheduler.tasks package."""
from pydolphinscheduler.tasks.condition import FAILURE, SUCCESS, And, Condition, Or
from pydolphinscheduler.tasks.datax import CustomDataX, DataX
from pydolphinscheduler.tasks.dependent import Dependent
from pydolphinscheduler.tasks.flink import Flink
from pydolphinscheduler.tasks.http import Http
from pydolphinscheduler.tasks.map_reduce import MR
from pydolphinscheduler.tasks.procedure import Procedure
from pydolphinscheduler.tasks.python import Python
from pydolphinscheduler.tasks.shell import Shell
from pydolphinscheduler.tasks.spark import Spark
from pydolphinscheduler.tasks.sql import Sql
from pydolphinscheduler.tasks.sub_process import SubProcess
from pydolphinscheduler.tasks.switch import Branch, Default, Switch, SwitchCondition
__all__ = [
"Condition",
"DataX",
"Dependent",
"Flink",
"Http",
"MR",
"Procedure",
"Python",
"Shell",
"Spark",
"Sql",
"SubProcess",
"Switch",
]

2
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py

@ -154,7 +154,7 @@ class Or(ConditionOperator):
super().__init__(*args) super().__init__(*args)
class Conditions(Task): class Condition(Task):
"""Task condition object, declare behavior for condition task to dolphinscheduler.""" """Task condition object, declare behavior for condition task to dolphinscheduler."""
def __init__( def __init__(

23
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py

@ -24,11 +24,28 @@ from pydolphinscheduler.core.task import Task
class Shell(Task): class Shell(Task):
"""Task shell object, declare behavior for shell task to dolphinscheduler. """Task shell object, declare behavior for shell task to dolphinscheduler.
TODO maybe we could use instance name to replace attribute `name` :param name: A unique, meaningful string for the shell task.
which is simplify as `task_shell = Shell(command = "echo 1")` and :param command: One or more command want to run in this task.
task.name assign to `task_shell`
It could be simply command::
Shell(name=..., command="echo task shell")
or maybe same commands trying to do complex task::
command = '''echo task shell step 1;
echo task shell step 2;
echo task shell step 3
'''
Shell(name=..., command=command)
""" """
# TODO maybe we could use instance name to replace attribute `name`
# which is simplify as `task_shell = Shell(command = "echo 1")` and
# task.name assign to `task_shell`
_task_custom_attr = { _task_custom_attr = {
"raw_script", "raw_script",
} }

42
dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py

@ -19,28 +19,17 @@
import ast import ast
import importlib import importlib
# import os
# import os
from pathlib import Path
from unittest.mock import patch from unittest.mock import patch
import pytest import pytest
from tests.testing.constants import task_without_example
from tests.testing.path import get_all_examples, get_tasks
from tests.testing.task import Task from tests.testing.task import Task
process_definition_name = set() process_definition_name = set()
def get_all_example_define():
"""Get all examples files in examples directory."""
return (
path
for path in Path(__file__).parent.parent.parent.joinpath("examples").iterdir()
if path.is_file()
)
def import_module(script_name, script_path): def import_module(script_name, script_path):
"""Import and run example module in examples directory.""" """Import and run example module in examples directory."""
spec = importlib.util.spec_from_file_location(script_name, script_path) spec = importlib.util.spec_from_file_location(script_name, script_path)
@ -49,6 +38,27 @@ def import_module(script_name, script_path):
return module return module
def test_task_without_example():
"""Test task which without example.
Avoiding add new type of tasks but without adding example describe how to use it.
"""
# We use example/tutorial.py as shell task example
ignore_name = {"__init__.py", "shell.py"}
all_tasks = {task.stem for task in get_tasks(ignore_name=ignore_name)}
have_example_tasks = set()
start = "task_"
end = "_example"
for ex in get_all_examples():
stem = ex.stem
if stem.startswith(start) and stem.endswith(end):
task_name = stem.replace(start, "").replace(end, "")
have_example_tasks.add(task_name)
assert all_tasks.difference(have_example_tasks) == task_without_example
@pytest.fixture @pytest.fixture
def setup_and_teardown_for_stuff(): def setup_and_teardown_for_stuff():
"""Fixture of py.test handle setup and teardown.""" """Fixture of py.test handle setup and teardown."""
@ -80,7 +90,7 @@ def test_example_basic():
* All example except `tutorial.py` is end with keyword "_example" * All example except `tutorial.py` is end with keyword "_example"
* All example must have not empty `__doc__`. * All example must have not empty `__doc__`.
""" """
for ex in get_all_example_define(): for ex in get_all_examples():
# All files in example is python script # All files in example is python script
assert ( assert (
ex.suffix == ".py" ex.suffix == ".py"
@ -121,7 +131,7 @@ def test_example_process_definition_without_same_name(
Our process definition would compete with others if we have same process definition name. It will make Our process definition would compete with others if we have same process definition name. It will make
different between actually workflow and our workflow-as-code file which make users feel strange. different between actually workflow and our workflow-as-code file which make users feel strange.
""" """
for ex in get_all_example_define(): for ex in get_all_examples():
# We use side_effect `submit_check_without_same_name` overwrite :func:`submit` # We use side_effect `submit_check_without_same_name` overwrite :func:`submit`
# and check whether it have duplicate name or not # and check whether it have duplicate name or not
import_module(ex.name, str(ex)) import_module(ex.name, str(ex))
@ -148,7 +158,7 @@ def test_file_name_in_process_definition(mock_code_version, mock_submit, mock_st
more than one process definition. more than one process definition.
""" """
global process_definition_name global process_definition_name
for ex in get_all_example_define(): for ex in get_all_examples():
# Skip bulk_create_example check, cause it contain multiple workflow and # Skip bulk_create_example check, cause it contain multiple workflow and
# without one named bulk_create_example # without one named bulk_create_example
if ex.stem == "bulk_create_example": if ex.stem == "bulk_create_example":

8
dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py

@ -27,8 +27,8 @@ from pydolphinscheduler.tasks.condition import (
FAILURE, FAILURE,
SUCCESS, SUCCESS,
And, And,
Condition,
ConditionOperator, ConditionOperator,
Conditions,
Or, Or,
Status, Status,
) )
@ -321,7 +321,7 @@ def test_condition_operator_set_define_attr_mix_operator(
return_value=(12345, 1), return_value=(12345, 1),
) )
@patch( @patch(
"pydolphinscheduler.tasks.condition.Conditions.gen_code_and_version", "pydolphinscheduler.tasks.condition.Condition.gen_code_and_version",
return_value=(123, 1), return_value=(123, 1),
) )
def test_condition_get_define(mock_condition_code_version, mock_task_code_version): def test_condition_get_define(mock_condition_code_version, mock_task_code_version):
@ -388,7 +388,7 @@ def test_condition_get_define(mock_condition_code_version, mock_task_code_versio
"timeout": 0, "timeout": 0,
} }
task = Conditions( task = Condition(
name, condition=cond_operator, success_task=common_task, failed_task=common_task name, condition=cond_operator, success_task=common_task, failed_task=common_task
) )
assert task.get_define() == expect assert task.get_define() == expect
@ -414,7 +414,7 @@ def test_condition_set_dep_workflow(mock_task_code_version):
success_branch = Task(name="success_branch", task_type=TEST_TYPE) success_branch = Task(name="success_branch", task_type=TEST_TYPE)
fail_branch = Task(name="fail_branch", task_type=TEST_TYPE) fail_branch = Task(name="fail_branch", task_type=TEST_TYPE)
condition = Conditions( condition = Condition(
name="conditions", name="conditions",
condition=cond_operator, condition=cond_operator,
success_task=success_branch, success_task=success_branch,

59
dolphinscheduler-python/pydolphinscheduler/tests/test_docs.py

@ -0,0 +1,59 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test pydolphinscheduler docs."""
import re
from tests.testing.constants import task_without_example
from tests.testing.path import get_doc_tasks, get_tasks
ignore_code_file = {"__init__.py"}
ignore_doc_file = {"index.rst"}
def test_without_missing_task_rst():
"""Test without missing any task document by compare filename.
Avoiding add new type of tasks but without adding document about it.
"""
code_files = {p.stem for p in get_tasks(ignore_name=ignore_code_file)}
doc_files = {p.stem for p in get_doc_tasks(ignore_name=ignore_doc_file)}
assert code_files == doc_files
def test_task_without_example():
"""Test task document which without example.
Avoiding add new type of tasks but without adding example content describe how to use it.
"""
task_without_example_detected = set()
pattern = re.compile("Example\n-------")
for doc in get_doc_tasks(ignore_name=ignore_doc_file):
search_result = pattern.search(doc.read_text())
if not search_result:
task_without_example_detected.add(doc.stem)
assert task_without_example == task_without_example_detected
def test_doc_automodule_directive_name():
"""Test task document with correct name in directive automodule."""
pattern = re.compile(".. automodule:: (.*)")
for doc in get_doc_tasks(ignore_name=ignore_doc_file):
match_string = pattern.search(doc.read_text()).group(1)
assert f"pydolphinscheduler.tasks.{doc.stem}" == match_string

28
dolphinscheduler-python/pydolphinscheduler/tests/testing/constants.py

@ -0,0 +1,28 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Constants variables for test module."""
# Record some task without example in directory `example`. Some of them maybe can not write example,
# but most of them just without adding by mistake, and we should add it later.
task_without_example = {
"sql",
"http",
"sub_process",
"python",
"procedure",
}

54
dolphinscheduler-python/pydolphinscheduler/tests/testing/path.py

@ -0,0 +1,54 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Handle path related issue in test module."""
from pathlib import Path
from typing import Any, Generator
path_code_tasks = Path(__file__).parent.parent.parent.joinpath(
"src", "pydolphinscheduler", "tasks"
)
path_example = Path(__file__).parent.parent.parent.joinpath("examples")
path_doc_tasks = Path(__file__).parent.parent.parent.joinpath("docs", "source", "tasks")
def get_all_examples() -> Generator[Path, Any, None]:
"""Get all examples files path in examples directory."""
return (ex for ex in path_example.iterdir() if ex.is_file())
def get_tasks(ignore_name: set = None) -> Generator[Path, Any, None]:
"""Get all tasks files path in src/pydolphinscheduler/tasks directory."""
if not ignore_name:
ignore_name = set()
return (
ex
for ex in path_code_tasks.iterdir()
if ex.is_file() and ex.name not in ignore_name
)
def get_doc_tasks(ignore_name: set = None) -> Generator[Path, Any, None]:
"""Get all tasks document path in docs/source/tasks directory."""
if not ignore_name:
ignore_name = set()
return (
ex
for ex in path_doc_tasks.iterdir()
if ex.is_file() and ex.name not in ignore_name
)
Loading…
Cancel
Save