diff --git a/.github/workflows/py-ci.yml b/.github/workflows/py-ci.yml index 3ab10f7d2e..60372134b6 100644 --- a/.github/workflows/py-ci.yml +++ b/.github/workflows/py-ci.yml @@ -97,3 +97,20 @@ jobs: pip install -e . - name: Run Tests && Check coverage run: coverage run && coverage report + doc-build: + name: Document Build Test + needs: pytest + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.7 + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Install Development Dependences + run: | + pip install -r requirements_dev.txt + pip install -e . + - name: Test Build Document + working-directory: dolphinscheduler-python/pydolphinscheduler/docs + run: make clean && make html diff --git a/.licenserc.yaml b/.licenserc.yaml index baf8cf604b..e510b0cb9d 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -45,5 +45,6 @@ header: - '.github/actions/comment-on-issue/**' - '.github/actions/reviewdog-setup/**' - '.github/actions/translate-on-issue/**' + - '**/.gitkeep' comment: on-failure diff --git a/dolphinscheduler-python/pydolphinscheduler/.flake8 b/dolphinscheduler-python/pydolphinscheduler/.flake8 index e676972c22..7f659a21b3 100644 --- a/dolphinscheduler-python/pydolphinscheduler/.flake8 +++ b/dolphinscheduler-python/pydolphinscheduler/.flake8 @@ -35,3 +35,4 @@ ignore = W503 # W503: Line breaks before binary operators per-file-ignores = src/pydolphinscheduler/side/__init__.py:F401 + src/pydolphinscheduler/tasks/__init__.py:F401 diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/Makefile b/dolphinscheduler-python/pydolphinscheduler/docs/Makefile new file mode 100644 index 0000000000..985198a779 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/Makefile @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. + +# Add opts `turn warnings into errors` strict sphinx-build behavior +SPHINXOPTS ?= -W +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/make.bat b/dolphinscheduler-python/pydolphinscheduler/docs/make.bat new file mode 100644 index 0000000000..feac4c92c0 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/make.bat @@ -0,0 +1,54 @@ +REM Licensed to the Apache Software Foundation (ASF) under one +REM or more contributor license agreements. See the NOTICE file +REM distributed with this work for additional information +REM regarding copyright ownership. The ASF licenses this file +REM to you under the Apache License, Version 2.0 (the +REM "License"); you may not use this file except in compliance +REM with the License. You may obtain a copy of the License at +REM +REM http://www.apache.org/licenses/LICENSE-2.0 +REM +REM Unless required by applicable law or agreed to in writing, +REM software distributed under the License is distributed on an +REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +REM KIND, either express or implied. See the License for the +REM specific language governing permissions and limitations +REM under the License. + +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build +REM Add opts `turn warnings into errors` strict sphinx-build behavior +set SPHINXOPTS=-W + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/_static/.gitkeep b/dolphinscheduler-python/pydolphinscheduler/docs/source/_static/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst new file mode 100644 index 0000000000..8e55ea5583 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst @@ -0,0 +1,47 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +API +=== + +Core +---- + +.. automodule:: pydolphinscheduler.core + :inherited-members: + +Sides +----- + +.. automodule:: pydolphinscheduler.side + :inherited-members: + +Tasks +----- + +.. automodule:: pydolphinscheduler.tasks + :inherited-members: + +Constants +--------- + +.. automodule:: pydolphinscheduler.constants + +Exceptions +---------- + +.. automodule:: pydolphinscheduler.exceptions diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst new file mode 100644 index 0000000000..9a9527df1d --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst @@ -0,0 +1,151 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Concepts +======== + +In this section, you would know the core concepts of *PyDolphinScheduler*. + +Process Definition +------------------ + +Process definition describe the whole things except `tasks`_ and `tasks dependence`_, which including +name, schedule interval, schedule start time and end time. You would know scheduler + +Process definition could be initialized in normal assign statement or in context manger. + +.. code-block:: python + + # Initialization with assign statement + pd = ProcessDefinition(name="my first process definition") + + # Or context manger + with ProcessDefinition(name="my first process definition") as pd: + pd.submit() + +Process definition is the main object communicate between *PyDolphinScheduler* and DolphinScheduler daemon. +After process definition and task is be declared, you could use `submit` and `run` notify server your definition. + +If you just want to submit your definition and create workflow, without run it, you should use attribute `submit`. +But if you want to run the workflow after you submit it, you could use attribute `run`. + +.. code-block:: python + + # Just submit definition, without run it + pd.submit() + + # Both submit and run definition + pd.run() + +Schedule +~~~~~~~~ + +We use parameter `schedule` determine the schedule interval of workflow, *PyDolphinScheduler* support seven +asterisks expression, and each of the meaning of position as below + +.. code-block:: text + + * * * * * * * + ┬ ┬ ┬ ┬ ┬ ┬ ┬ + │ │ │ │ │ │ │ + │ │ │ │ │ │ └─── year + │ │ │ │ │ └───── day of week (0 - 7) (0 to 6 are Sunday to Saturday, or use names; 7 is Sunday, the same as 0) + │ │ │ │ └─────── month (1 - 12) + │ │ │ └───────── day of month (1 - 31) + │ │ └─────────── hour (0 - 23) + │ └───────────── min (0 - 59) + └─────────────── second (0 - 59) + +Here we add some example crontab: + +- `0 0 0 * * ? *`: Workflow execute every day at 00:00:00. +- `10 2 * * * ? *`: Workflow execute hourly day at ten pass two. +- `10,11 20 0 1,2 * ? *`: Workflow execute first and second day of month at 00:20:10 and 00:20:11. + +Tenant +~~~~~~ + +Tenant is the user who run task command in machine or in virtual machine. it could be assign by simple string. + +.. code-block:: python + + # + pd = ProcessDefinition(name="process definition tenant", tenant="tenant_exists") + +.. note:: + + Make should tenant exists in target machine, otherwise it will raise an error when you try to run command + +Tasks +----- + +Task is the minimum unit running actual job, and it is nodes of DAG, aka directed acyclic graph. You could define +what you want to in the task. It have some required parameter to make uniqueness and definition. + +Here we use :py:meth:`pydolphinscheduler.tasks.Shell` as example, parameter `name` and `command` is required and must be provider. Parameter +`name` set name to the task, and parameter `command` declare the command you wish to run in this task. + +.. code-block:: python + + # We named this task as "shell", and just run command `echo shell task` + shell_task = Shell(name="shell", command="echo shell task") + +If you want to see all type of tasks, you could see :doc:`tasks/index`. + +Tasks Dependence +~~~~~~~~~~~~~~~~ + +You could define many tasks in on single `Process Definition`_. If all those task is in parallel processing, +then you could leave them alone without adding any additional information. But if there have some tasks should +not be run unless pre task in workflow have be done, we should set task dependence to them. Set tasks dependence +have two mainly way and both of them is easy. You could use bitwise operator `>>` and `<<`, or task attribute +`set_downstream` and `set_upstream` to do it. + +.. code-block:: python + + # Set task1 as task2 upstream + task1 >> task2 + # You could use attribute `set_downstream` too, is same as `task1 >> task2` + task1.set_downstream(task2) + + # Set task1 as task2 downstream + task1 << task2 + # It is same as attribute `set_upstream` + task1.set_upstream(task2) + + # Beside, we could set dependence between task and sequence of tasks, + # we set `task1` is upstream to both `task2` and `task3`. It is useful + # for some tasks have same dependence. + task1 >> [task2, task3] + +Task With Process Definition +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In most of data orchestration cases, you should assigned attribute `process_definition` to task instance to +decide workflow of task. You could set `process_definition` in both normal assign or in context manger mode + +.. code-block:: python + + # Normal assign, have to explicit declaration and pass `ProcessDefinition` instance to task + pd = ProcessDefinition(name="my first process definition") + shell_task = Shell(name="shell", command="echo shell task", process_definition=pd) + + # Context manger, `ProcessDefinition` instance pd would implicit declaration to task + with ProcessDefinition(name="my first process definition") as pd: + shell_task = Shell(name="shell", command="echo shell task", + +With both `Process Definition`_, `Tasks`_ and `Tasks Dependence`_, we could build a workflow with multiple tasks. diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py b/dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py new file mode 100644 index 0000000000..5ee73a5bb4 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = "pydolphinscheduler" +copyright = "2022, apache" +author = "apache" + +# The full version, including alpha/beta/rc tags +release = "0.0.1" + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + # Measures durations of Sphinx processing + "sphinx.ext.duration", + # Semi-automatic make docstrings to document + "sphinx.ext.autodoc", + "sphinx.ext.viewcode", + "sphinx.ext.autosectionlabel", + "sphinx_rtd_theme", +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +autodoc_default_options = { + "members": True, + "show-inheritance": True, + "private-members": True, + "undoc-members": True, + "member-order": "groupwise", +} + +autosectionlabel_prefix_document = True + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/index.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/index.rst new file mode 100644 index 0000000000..b04c26f094 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/index.rst @@ -0,0 +1,42 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +PyDolphinScheduler +================== + +**PyDolphinScheduler** is Python API for `Apache DolphinScheduler `_, +which allow you definition your workflow by Python code, aka workflow-as-codes. + +I could go and find how to :ref:`install ` the project. Or if you want to see simply example +then go and see :doc:`tutorial` for more detail. + + +.. toctree:: + :maxdepth: 2 + + start + tutorial + concept + tasks/index + api + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst new file mode 100644 index 0000000000..0af90d5494 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst @@ -0,0 +1,113 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Getting Started +=============== + +To get started with *PyDolphinScheduler* you must ensure python and pip +installed on your machine, if you're already set up, you can skip straight +to `Installing PyDolphinScheduler`_, otherwise please continue with +`Installing Python`_. + +Installing Python +----------------- + +How to install `python` and `pip` depends on what operating system +you're using. The python wiki provides up to date +`instructions for all platforms here`_. When you entering the website +and choice your operating system, you would be offered the choice and +select python version. *PyDolphinScheduler* recommend use version above +Python 3.6 and we highly recommend you install *Stable Releases* instead +of *Pre-releases*. + +After you have download and installed Python, you should open your terminal, +typing and running :code:`python --version` to check whether the installation +is correct or not. If all thing good, you could see the version in console +without error(here is a example after Python 3.8.7 installed) + +.. code-block:: bash + + $ python --version + Python 3.8.7 + +Installing PyDolphinScheduler +----------------------------- + +After Python is already installed on your machine following section +`installing Python`_, it easy to *PyDolphinScheduler* by pip. + +.. code-block:: bash + + $ pip install apache-dolphinscheduler + +The latest version of *PyDolphinScheduler* would be installed after you run above +command in your terminal. You could go and `start Python Gateway Server`_ to finish +the prepare, and then go to :doc:`tutorial` to make your hand dirty. But if you +want to install the unreleased version of *PyDolphinScheduler*, you could go and see +section `installing PyDolphinScheduler in dev`_ for more detail. + +Installing PyDolphinScheduler In Dev +------------------------------------ + +Because the project is developing and some of the features still not release. +If you want to try some thing unreleased you could install from the source code +which we hold in GitHub + +.. code-block:: bash + + # Clone Apache DolphinScheduler repository + $ git clone git@github.com:apache/dolphinscheduler.git + # Install PyDolphinScheduler in develop mode + $ cd dolphinscheduler-python/pydolphinscheduler && pip install -e . + +After you installed *PyDolphinScheduler*, please remember `start Python Gateway Server`_ +which waiting for *PyDolphinScheduler*'s workflow definition require. + +Start Python Gateway Server +--------------------------- + +Since **PyDolphinScheduler** is Python API for `Apache DolphinScheduler`_, it +could define workflow and tasks structure, but could not run it unless you +`install Apache DolphinScheduler`_ and start Python gateway server. We only +and some key steps here and you could go `install Apache DolphinScheduler`_ +for more detail + +.. code-block:: bash + + # Start pythonGatewayServer + $ ./bin/dolphinscheduler-daemon.sh start pythonGatewayServer + +To check whether the server is alive or not, you could run :code:`jps`. And +the server is health if keyword `PythonGatewayServer` in the console. + +.. code-block:: bash + + $ jps + .... + 201472 PythonGatewayServer + .... + +What's More +----------- + +If you do not familiar with *PyDolphinScheduler*, you could go to :doc:`tutorial` +and see how it work. But if you already know the inside of *PyDolphinScheduler*, +maybe you could go and play with all :doc:`tasks/index` *PyDolphinScheduler* supports. + +.. _`instructions for all platforms here`: https://wiki.python.org/moin/BeginnersGuide/Download +.. _`Apache DolphinScheduler`: https://dolphinscheduler.apache.org +.. _`install Apache DolphinScheduler`: https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/installation/standalone.html diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst new file mode 100644 index 0000000000..57aff8ec73 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst @@ -0,0 +1,33 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Condition +========= + +A condition task type's example and dive into information of **PyDolphinScheduler**. + +Example +------- + +.. literalinclude:: ../../../examples/task_condition_example.py + :start-after: [start workflow_declare] + :end-before: [end workflow_declare] + +Dive Into +--------- + +.. automodule:: pydolphinscheduler.tasks.condition diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst new file mode 100644 index 0000000000..da9e4ce615 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst @@ -0,0 +1,33 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Datax +===== + +A DataX task type's example and dive into information of **PyDolphinScheduler**. + +Example +------- + +.. literalinclude:: ../../../examples/task_datax_example.py + :start-after: [start workflow_declare] + :end-before: [end workflow_declare] + +Dive Into +--------- + +.. automodule:: pydolphinscheduler.tasks.datax diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst new file mode 100644 index 0000000000..fb3b3e28dc --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst @@ -0,0 +1,33 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Dependent +========= + +A dependent task type's example and dive into information of **PyDolphinScheduler**. + +Example +------- + +.. literalinclude:: ../../../examples/task_dependent_example.py + :start-after: [start workflow_declare] + :end-before: [end workflow_declare] + +Dive Into +--------- + +.. automodule:: pydolphinscheduler.tasks.dependent diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst new file mode 100644 index 0000000000..9eadd8f5cb --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst @@ -0,0 +1,33 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Flink +===== + +A flink task type's example and dive into information of **PyDolphinScheduler**. + +Example +------- + +.. literalinclude:: ../../../examples/task_flink_example.py + :start-after: [start workflow_declare] + :end-before: [end workflow_declare] + +Dive Into +--------- + +.. automodule:: pydolphinscheduler.tasks.flink diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst new file mode 100644 index 0000000000..4c6d8f8e40 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst @@ -0,0 +1,21 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +HTTP +==== + +.. automodule:: pydolphinscheduler.tasks.http diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst new file mode 100644 index 0000000000..42dcdf9c8c --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst @@ -0,0 +1,41 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Tasks +===== + +In this section + +.. toctree:: + :maxdepth: 1 + + shell + sql + python + http + + switch + condition + dependent + + spark + flink + map_reduce + procedure + + datax + sub_process diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst new file mode 100644 index 0000000000..c46f4c6215 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst @@ -0,0 +1,34 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Map Reduce +========== + + +A Map Reduce task type's example and dive into information of **PyDolphinScheduler**. + +Example +------- + +.. literalinclude:: ../../../examples/task_map_reduce_example.py + :start-after: [start workflow_declare] + :end-before: [end workflow_declare] + +Dive Into +--------- + +.. automodule:: pydolphinscheduler.tasks.map_reduce diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst new file mode 100644 index 0000000000..cd79eff140 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst @@ -0,0 +1,21 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Procedure +========= + +.. automodule:: pydolphinscheduler.tasks.procedure diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst new file mode 100644 index 0000000000..660e46a6e5 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst @@ -0,0 +1,21 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Python +====== + +.. automodule:: pydolphinscheduler.tasks.python diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst new file mode 100644 index 0000000000..6497f0fd33 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst @@ -0,0 +1,33 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Shell +===== + +A shell task type's example and dive into information of **PyDolphinScheduler**. + +Example +------- + +.. literalinclude:: ../../../examples/tutorial.py + :start-after: [start workflow_declare] + :end-before: [end task_relation_declare] + +Dive Into +--------- + +.. automodule:: pydolphinscheduler.tasks.shell diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst new file mode 100644 index 0000000000..10fd67235f --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst @@ -0,0 +1,33 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Spark +===== + +A spark task type's example and dive into information of **PyDolphinScheduler**. + +Example +------- + +.. literalinclude:: ../../../examples/task_spark_example.py + :start-after: [start workflow_declare] + :end-before: [end workflow_declare] + +Dive Into +--------- + +.. automodule:: pydolphinscheduler.tasks.spark diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst new file mode 100644 index 0000000000..21eaec7ae9 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst @@ -0,0 +1,21 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +SQL +=== + +.. automodule:: pydolphinscheduler.tasks.sql diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst new file mode 100644 index 0000000000..8a9f562200 --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst @@ -0,0 +1,21 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Sub Process +=========== + +.. automodule:: pydolphinscheduler.tasks.sub_process diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst new file mode 100644 index 0000000000..271050ee5c --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst @@ -0,0 +1,33 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Switch +====== + +A switch task type's example and dive into information of **PyDolphinScheduler**. + +Example +------- + +.. literalinclude:: ../../../examples/task_switch_example.py + :start-after: [start workflow_declare] + :end-before: [end workflow_declare] + +Dive Into +--------- + +.. automodule:: pydolphinscheduler.tasks.switch diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst new file mode 100644 index 0000000000..a7c9e585cd --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst @@ -0,0 +1,150 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Tutorial +======== + +This tutorial show you the basic concept of *PyDolphinScheduler* and tell all +things you should know before you submit or run your first workflow. If you +still not install *PyDolphinScheduler* and start Apache DolphinScheduler, you +could go and see :ref:`how to getting start PyDolphinScheduler ` + +Overview of Tutorial +-------------------- + +Here have an overview of our tutorial, and it look a little complex but do not +worry about that because we explain this example below as detailed as possible. + +.. literalinclude:: ../../examples/tutorial.py + :start-after: [start tutorial] + :end-before: [end tutorial] + +Import Necessary Module +----------------------- + +First of all, we should importing necessary module which we would use later just +like other Python package. We just create a minimum demo here, so we just import +:class:`pydolphinscheduler.core.process_definition` and +:class:`pydolphinscheduler.tasks.shell`. + +.. literalinclude:: ../../examples/tutorial.py + :start-after: [start package_import] + :end-before: [end package_import] + +If you want to use other task type you could click and +:doc:`see all tasks we support ` + +Process Definition Declaration +------------------------------ + +We should instantiate object after we import them from `import necessary module`_. +Here we declare basic arguments for process definition(aka, workflow). We define +the name of process definition, using `Python context manager`_ and it +**the only required argument** for object process definition. Beside that we also +declare three arguments named `schedule`, `start_time` which setting workflow schedule +interval and schedule start_time, and argument `tenant` which changing workflow's +task running user in the worker, :ref:`section tenant ` in *PyDolphinScheduler* +:doc:`concept` page have more detail information. + +.. literalinclude:: ../../examples/tutorial.py + :start-after: [start workflow_declare] + :end-before: [end workflow_declare] + +We could find more detail about process definition in +:ref:`concept about process definition ` if you interested in it. +For all arguments of object process definition, you could find in the +:class:`pydolphinscheduler.core.process_definition` api documentation. + +Task Declaration +---------------- + +Here we declare four tasks, and bot of them are simple task of +:class:`pydolphinscheduler.tasks.shell` which running `echo` command in terminal. +Beside the argument `command`, we also need setting argument `name` for each task *(not +only shell task, `name` is required for each type of task)*. + +.. literalinclude:: ../../examples/tutorial.py + :dedent: 0 + :start-after: [start task_declare] + :end-before: [end task_declare] + +Beside shell task, *PyDolphinScheduler* support multiple tasks and you could +find in :doc:`tasks/index`. + +Setting Task Dependence +----------------------- + +After we declare both process definition and task, we have one workflow with +four tasks, both all tasks is independent so that they would run in parallel. +We should reorder the sort and the dependence of tasks. It useful when we need +run prepare task before we run actual task or we need tasks running is specific +rule. We both support attribute `set_downstream` and `set_upstream`, or bitwise +operators `>>` and `<<`. + +In this example, we set task `task_parent` is the upstream task of task +`task_child_one` and `task_child_two`, and task `task_union` is the downstream +task of both these two task. + +.. literalinclude:: ../../examples/tutorial.py + :dedent: 0 + :start-after: [start task_relation_declare] + :end-before: [end task_relation_declare] + +Please notice that we could grouping some tasks and set dependence if they have +same downstream or upstream. We declare task `task_child_one` and `task_child_two` +as a group here, named as `task_group` and set task `task_parent` as upstream of +both of them. You could see more detail in :ref:`concept:Tasks Dependence` section in concept +documentation. + +Submit Or Run Workflow +---------------------- + +Now we finish our workflow definition, with task and task dependence, but all +these things are in local, we should let Apache DolphinScheduler daemon know what we +define our workflow. So the last thing we have to do here is submit our workflow to +Apache DolphinScheduler daemon. + +We here in the example using `ProcessDefinition` attribute `run` to submit workflow +to the daemon, and set the schedule time we just declare in `process definition declaration`_. + +Now, we could run the Python code like other Python script, for the basic usage run +:code:`python tutorial.py` to trigger and run it. + +.. literalinclude:: ../../examples/tutorial.py + :dedent: 0 + :start-after: [start submit_or_run] + :end-before: [end submit_or_run] + +If you not start your Apache DolphinScheduler server, you could find the way in +:ref:`start:start Python gateway server` and it would have more detail about related server +start. Beside attribute `run`, we have attribute `submit` for object `ProcessDefinition` +and it just submit workflow to the daemon but not setting the schedule information. For +more detail you could see :ref:`concept:process definition`. + +DAG Graph After Tutorial Run +---------------------------- + +After we run the tutorial code, you could login Apache DolphinScheduler web UI, +go and see the `DolphinScheduler project page`_. they is a new process definition be +created and named "Tutorial". It create by *PyDolphinScheduler* and the DAG graph as below + +.. literalinclude:: ../../examples/tutorial.py + :language: text + :lines: 24-28 + +.. _`DolphinScheduler project page`: https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/project.html +.. _`Python context manager`: https://docs.python.org/3/library/stdtypes.html#context-manager-types diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/task_conditions_example.py b/dolphinscheduler-python/pydolphinscheduler/examples/task_condition_example.py similarity index 91% rename from dolphinscheduler-python/pydolphinscheduler/examples/task_conditions_example.py rename to dolphinscheduler-python/pydolphinscheduler/examples/task_condition_example.py index 6c1b039a3b..2d73df4b40 100644 --- a/dolphinscheduler-python/pydolphinscheduler/examples/task_conditions_example.py +++ b/dolphinscheduler-python/pydolphinscheduler/examples/task_condition_example.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +# [start workflow_declare] r""" A example workflow for task condition. @@ -31,10 +32,10 @@ pre_task_3 -> -> fail_branch """ from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.condition import FAILURE, SUCCESS, And, Conditions +from pydolphinscheduler.tasks.condition import FAILURE, SUCCESS, And, Condition from pydolphinscheduler.tasks.shell import Shell -with ProcessDefinition(name="task_conditions_example", tenant="tenant_exists") as pd: +with ProcessDefinition(name="task_condition_example", tenant="tenant_exists") as pd: pre_task_1 = Shell(name="pre_task_1", command="echo pre_task_1") pre_task_2 = Shell(name="pre_task_2", command="echo pre_task_2") pre_task_3 = Shell(name="pre_task_3", command="echo pre_task_3") @@ -48,10 +49,11 @@ with ProcessDefinition(name="task_conditions_example", tenant="tenant_exists") a success_branch = Shell(name="success_branch", command="echo success_branch") fail_branch = Shell(name="fail_branch", command="echo fail_branch") - condition = Conditions( - name="conditions", + condition = Condition( + name="condition", condition=cond_operator, success_task=success_branch, failed_task=fail_branch, ) pd.submit() +# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/task_datax_example.py b/dolphinscheduler-python/pydolphinscheduler/examples/task_datax_example.py index cdc7f0f860..94bd449cf7 100644 --- a/dolphinscheduler-python/pydolphinscheduler/examples/task_datax_example.py +++ b/dolphinscheduler-python/pydolphinscheduler/examples/task_datax_example.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +# [start workflow_declare] """ A example workflow for task datax. @@ -24,7 +25,6 @@ You can create data sources `first_mysql` and `first_mysql` through UI. It creates a task to synchronize datax from the source database to the target database. """ - from pydolphinscheduler.core.process_definition import ProcessDefinition from pydolphinscheduler.tasks.datax import CustomDataX, DataX @@ -92,3 +92,4 @@ with ProcessDefinition( # datax job same as task1, transfer record from `first_mysql` to `second_mysql` task2 = CustomDataX(name="task_custom_datax", json=str(JSON_TEMPLATE)) pd.run() +# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/task_dependent_example.py b/dolphinscheduler-python/pydolphinscheduler/examples/task_dependent_example.py index 73b91aaf56..ae19d95516 100644 --- a/dolphinscheduler-python/pydolphinscheduler/examples/task_dependent_example.py +++ b/dolphinscheduler-python/pydolphinscheduler/examples/task_dependent_example.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +# [start workflow_declare] r""" A example workflow for task dependent. @@ -70,3 +71,4 @@ with ProcessDefinition( ), ) pd.submit() +# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/task_flink_example.py b/dolphinscheduler-python/pydolphinscheduler/examples/task_flink_example.py index 1ef259e57a..1e8a040c65 100644 --- a/dolphinscheduler-python/pydolphinscheduler/examples/task_flink_example.py +++ b/dolphinscheduler-python/pydolphinscheduler/examples/task_flink_example.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +# [start workflow_declare] """A example workflow for task flink.""" from pydolphinscheduler.core.process_definition import ProcessDefinition @@ -29,3 +30,4 @@ with ProcessDefinition(name="task_flink_example", tenant="tenant_exists") as pd: deploy_mode=DeployMode.LOCAL, ) pd.run() +# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/task_map_reduce_example.py b/dolphinscheduler-python/pydolphinscheduler/examples/task_map_reduce_example.py index 75e09a9aea..39b204f82a 100644 --- a/dolphinscheduler-python/pydolphinscheduler/examples/task_map_reduce_example.py +++ b/dolphinscheduler-python/pydolphinscheduler/examples/task_map_reduce_example.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +# [start workflow_declare] """A example workflow for task mr.""" from pydolphinscheduler.core.engine import ProgramType @@ -30,3 +31,4 @@ with ProcessDefinition(name="task_map_reduce_example", tenant="tenant_exists") a main_args="/dolphinscheduler/tenant_exists/resources/file.txt /output/ds", ) pd.run() +# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/task_spark_example.py b/dolphinscheduler-python/pydolphinscheduler/examples/task_spark_example.py index 6cace77157..594d95f55a 100644 --- a/dolphinscheduler-python/pydolphinscheduler/examples/task_spark_example.py +++ b/dolphinscheduler-python/pydolphinscheduler/examples/task_spark_example.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +# [start workflow_declare] """A example workflow for task spark.""" from pydolphinscheduler.core.process_definition import ProcessDefinition @@ -29,3 +30,4 @@ with ProcessDefinition(name="task_spark_example", tenant="tenant_exists") as pd: deploy_mode=DeployMode.LOCAL, ) pd.run() +# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/task_switch_example.py b/dolphinscheduler-python/pydolphinscheduler/examples/task_switch_example.py index 2b30d0a98c..7966af320e 100644 --- a/dolphinscheduler-python/pydolphinscheduler/examples/task_switch_example.py +++ b/dolphinscheduler-python/pydolphinscheduler/examples/task_switch_example.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +# [start workflow_declare] r""" A example workflow for task switch. @@ -47,3 +48,4 @@ with ProcessDefinition( switch = Switch(name="switch", condition=switch_condition) parent >> switch pd.submit() +# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/tutorial.py b/dolphinscheduler-python/pydolphinscheduler/examples/tutorial.py index d58bd753b0..0478e68519 100644 --- a/dolphinscheduler-python/pydolphinscheduler/examples/tutorial.py +++ b/dolphinscheduler-python/pydolphinscheduler/examples/tutorial.py @@ -24,29 +24,45 @@ and workflow DAG graph as below: --> task_child_one / \ task_parent --> --> task_union - \ / + \ / --> task_child_two it will instantiate and run all the task it have. """ +# [start tutorial] +# [start package_import] +# Import ProcessDefinition object to define your workflow attributes from pydolphinscheduler.core.process_definition import ProcessDefinition + +# Import task Shell object cause we would create some shell tasks later from pydolphinscheduler.tasks.shell import Shell +# [end package_import] + +# [start workflow_declare] with ProcessDefinition( name="tutorial", schedule="0 0 0 * * ? *", start_time="2021-01-01", tenant="tenant_exists", ) as pd: + # [end workflow_declare] + # [start task_declare] task_parent = Shell(name="task_parent", command="echo hello pydolphinscheduler") task_child_one = Shell(name="task_child_one", command="echo 'child one'") task_child_two = Shell(name="task_child_two", command="echo 'child two'") task_union = Shell(name="task_union", command="echo union") + # [end task_declare] + # [start task_relation_declare] task_group = [task_child_one, task_child_two] task_parent.set_downstream(task_group) task_union << task_group + # [end task_relation_declare] + # [start submit_or_run] pd.run() + # [end submit_or_run] +# [end tutorial] diff --git a/dolphinscheduler-python/pydolphinscheduler/requirements_dev.txt b/dolphinscheduler-python/pydolphinscheduler/requirements_dev.txt index d31a56dc44..58f6102deb 100644 --- a/dolphinscheduler-python/pydolphinscheduler/requirements_dev.txt +++ b/dolphinscheduler-python/pydolphinscheduler/requirements_dev.txt @@ -25,3 +25,6 @@ flake8 flake8-docstrings flake8-black isort +# Document +sphinx +sphinx_rtd_theme diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py index 3fbddf312b..31dc9446d8 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py @@ -16,3 +16,13 @@ # under the License. """Init pydolphinscheduler.core package.""" + +from pydolphinscheduler.core.database import Database +from pydolphinscheduler.core.process_definition import ProcessDefinition +from pydolphinscheduler.core.task import Task + +__all__ = [ + "ProcessDefinition", + "Task", + "Database", +] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/__init__.py index de5188cc51..c4479ddf24 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/__init__.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/__init__.py @@ -18,5 +18,15 @@ """Init Side package, Side package keep object related to DolphinScheduler but not in the Core part.""" from pydolphinscheduler.side.project import Project +from pydolphinscheduler.side.queue import Queue from pydolphinscheduler.side.tenant import Tenant from pydolphinscheduler.side.user import User +from pydolphinscheduler.side.worker_group import WorkerGroup + +__all__ = [ + "Project", + "Tenant", + "User", + "Queue", + "WorkerGroup", +] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py index 9eded9993f..dd46c912df 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py @@ -16,3 +16,33 @@ # under the License. """Init pydolphinscheduler.tasks package.""" + +from pydolphinscheduler.tasks.condition import FAILURE, SUCCESS, And, Condition, Or +from pydolphinscheduler.tasks.datax import CustomDataX, DataX +from pydolphinscheduler.tasks.dependent import Dependent +from pydolphinscheduler.tasks.flink import Flink +from pydolphinscheduler.tasks.http import Http +from pydolphinscheduler.tasks.map_reduce import MR +from pydolphinscheduler.tasks.procedure import Procedure +from pydolphinscheduler.tasks.python import Python +from pydolphinscheduler.tasks.shell import Shell +from pydolphinscheduler.tasks.spark import Spark +from pydolphinscheduler.tasks.sql import Sql +from pydolphinscheduler.tasks.sub_process import SubProcess +from pydolphinscheduler.tasks.switch import Branch, Default, Switch, SwitchCondition + +__all__ = [ + "Condition", + "DataX", + "Dependent", + "Flink", + "Http", + "MR", + "Procedure", + "Python", + "Shell", + "Spark", + "Sql", + "SubProcess", + "Switch", +] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py index 895a29bfdf..50aac251d8 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py @@ -154,7 +154,7 @@ class Or(ConditionOperator): super().__init__(*args) -class Conditions(Task): +class Condition(Task): """Task condition object, declare behavior for condition task to dolphinscheduler.""" def __init__( diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py index 94ad2f4416..9a73535c8c 100644 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py +++ b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py @@ -24,11 +24,28 @@ from pydolphinscheduler.core.task import Task class Shell(Task): """Task shell object, declare behavior for shell task to dolphinscheduler. - TODO maybe we could use instance name to replace attribute `name` - which is simplify as `task_shell = Shell(command = "echo 1")` and - task.name assign to `task_shell` + :param name: A unique, meaningful string for the shell task. + :param command: One or more command want to run in this task. + + It could be simply command:: + + Shell(name=..., command="echo task shell") + + or maybe same commands trying to do complex task:: + + command = '''echo task shell step 1; + echo task shell step 2; + echo task shell step 3 + ''' + + Shell(name=..., command=command) + """ + # TODO maybe we could use instance name to replace attribute `name` + # which is simplify as `task_shell = Shell(command = "echo 1")` and + # task.name assign to `task_shell` + _task_custom_attr = { "raw_script", } diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py b/dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py index bf980edb8d..e2e39da78e 100644 --- a/dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py +++ b/dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py @@ -19,28 +19,17 @@ import ast import importlib - -# import os -# import os -from pathlib import Path from unittest.mock import patch import pytest +from tests.testing.constants import task_without_example +from tests.testing.path import get_all_examples, get_tasks from tests.testing.task import Task process_definition_name = set() -def get_all_example_define(): - """Get all examples files in examples directory.""" - return ( - path - for path in Path(__file__).parent.parent.parent.joinpath("examples").iterdir() - if path.is_file() - ) - - def import_module(script_name, script_path): """Import and run example module in examples directory.""" spec = importlib.util.spec_from_file_location(script_name, script_path) @@ -49,6 +38,27 @@ def import_module(script_name, script_path): return module +def test_task_without_example(): + """Test task which without example. + + Avoiding add new type of tasks but without adding example describe how to use it. + """ + # We use example/tutorial.py as shell task example + ignore_name = {"__init__.py", "shell.py"} + all_tasks = {task.stem for task in get_tasks(ignore_name=ignore_name)} + + have_example_tasks = set() + start = "task_" + end = "_example" + for ex in get_all_examples(): + stem = ex.stem + if stem.startswith(start) and stem.endswith(end): + task_name = stem.replace(start, "").replace(end, "") + have_example_tasks.add(task_name) + + assert all_tasks.difference(have_example_tasks) == task_without_example + + @pytest.fixture def setup_and_teardown_for_stuff(): """Fixture of py.test handle setup and teardown.""" @@ -80,7 +90,7 @@ def test_example_basic(): * All example except `tutorial.py` is end with keyword "_example" * All example must have not empty `__doc__`. """ - for ex in get_all_example_define(): + for ex in get_all_examples(): # All files in example is python script assert ( ex.suffix == ".py" @@ -121,7 +131,7 @@ def test_example_process_definition_without_same_name( Our process definition would compete with others if we have same process definition name. It will make different between actually workflow and our workflow-as-code file which make users feel strange. """ - for ex in get_all_example_define(): + for ex in get_all_examples(): # We use side_effect `submit_check_without_same_name` overwrite :func:`submit` # and check whether it have duplicate name or not import_module(ex.name, str(ex)) @@ -148,7 +158,7 @@ def test_file_name_in_process_definition(mock_code_version, mock_submit, mock_st more than one process definition. """ global process_definition_name - for ex in get_all_example_define(): + for ex in get_all_examples(): # Skip bulk_create_example check, cause it contain multiple workflow and # without one named bulk_create_example if ex.stem == "bulk_create_example": diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py index 6085de23b0..523264034a 100644 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py +++ b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py @@ -27,8 +27,8 @@ from pydolphinscheduler.tasks.condition import ( FAILURE, SUCCESS, And, + Condition, ConditionOperator, - Conditions, Or, Status, ) @@ -321,7 +321,7 @@ def test_condition_operator_set_define_attr_mix_operator( return_value=(12345, 1), ) @patch( - "pydolphinscheduler.tasks.condition.Conditions.gen_code_and_version", + "pydolphinscheduler.tasks.condition.Condition.gen_code_and_version", return_value=(123, 1), ) def test_condition_get_define(mock_condition_code_version, mock_task_code_version): @@ -388,7 +388,7 @@ def test_condition_get_define(mock_condition_code_version, mock_task_code_versio "timeout": 0, } - task = Conditions( + task = Condition( name, condition=cond_operator, success_task=common_task, failed_task=common_task ) assert task.get_define() == expect @@ -414,7 +414,7 @@ def test_condition_set_dep_workflow(mock_task_code_version): success_branch = Task(name="success_branch", task_type=TEST_TYPE) fail_branch = Task(name="fail_branch", task_type=TEST_TYPE) - condition = Conditions( + condition = Condition( name="conditions", condition=cond_operator, success_task=success_branch, diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/test_docs.py b/dolphinscheduler-python/pydolphinscheduler/tests/test_docs.py new file mode 100644 index 0000000000..930e4f709e --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/tests/test_docs.py @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Test pydolphinscheduler docs.""" + +import re + +from tests.testing.constants import task_without_example +from tests.testing.path import get_doc_tasks, get_tasks + +ignore_code_file = {"__init__.py"} +ignore_doc_file = {"index.rst"} + + +def test_without_missing_task_rst(): + """Test without missing any task document by compare filename. + + Avoiding add new type of tasks but without adding document about it. + """ + code_files = {p.stem for p in get_tasks(ignore_name=ignore_code_file)} + doc_files = {p.stem for p in get_doc_tasks(ignore_name=ignore_doc_file)} + assert code_files == doc_files + + +def test_task_without_example(): + """Test task document which without example. + + Avoiding add new type of tasks but without adding example content describe how to use it. + """ + task_without_example_detected = set() + pattern = re.compile("Example\n-------") + + for doc in get_doc_tasks(ignore_name=ignore_doc_file): + search_result = pattern.search(doc.read_text()) + if not search_result: + task_without_example_detected.add(doc.stem) + assert task_without_example == task_without_example_detected + + +def test_doc_automodule_directive_name(): + """Test task document with correct name in directive automodule.""" + pattern = re.compile(".. automodule:: (.*)") + for doc in get_doc_tasks(ignore_name=ignore_doc_file): + match_string = pattern.search(doc.read_text()).group(1) + assert f"pydolphinscheduler.tasks.{doc.stem}" == match_string diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/constants.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/constants.py new file mode 100644 index 0000000000..155219267e --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/tests/testing/constants.py @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Constants variables for test module.""" + +# Record some task without example in directory `example`. Some of them maybe can not write example, +# but most of them just without adding by mistake, and we should add it later. +task_without_example = { + "sql", + "http", + "sub_process", + "python", + "procedure", +} diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/path.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/path.py new file mode 100644 index 0000000000..0fad03779f --- /dev/null +++ b/dolphinscheduler-python/pydolphinscheduler/tests/testing/path.py @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Handle path related issue in test module.""" + +from pathlib import Path +from typing import Any, Generator + +path_code_tasks = Path(__file__).parent.parent.parent.joinpath( + "src", "pydolphinscheduler", "tasks" +) +path_example = Path(__file__).parent.parent.parent.joinpath("examples") +path_doc_tasks = Path(__file__).parent.parent.parent.joinpath("docs", "source", "tasks") + + +def get_all_examples() -> Generator[Path, Any, None]: + """Get all examples files path in examples directory.""" + return (ex for ex in path_example.iterdir() if ex.is_file()) + + +def get_tasks(ignore_name: set = None) -> Generator[Path, Any, None]: + """Get all tasks files path in src/pydolphinscheduler/tasks directory.""" + if not ignore_name: + ignore_name = set() + return ( + ex + for ex in path_code_tasks.iterdir() + if ex.is_file() and ex.name not in ignore_name + ) + + +def get_doc_tasks(ignore_name: set = None) -> Generator[Path, Any, None]: + """Get all tasks document path in docs/source/tasks directory.""" + if not ignore_name: + ignore_name = set() + return ( + ex + for ex in path_doc_tasks.iterdir() + if ex.is_file() and ex.name not in ignore_name + )