Browse Source

cherry-pick [chore] Separate Python API into another repository

#12779
3.1.3-release
Jay Chung 2 years ago committed by zhuangchong
parent
commit
a645cb1c2a
  1. 12
      .flake8
  2. 1
      .github/CODEOWNERS
  3. 4
      .github/actions/labeler/labeler.yml
  4. 205
      .github/workflows/py-ci.yml
  5. 1
      .github/workflows/unit-test.yml
  6. 15
      .gitignore
  7. 49
      README.md
  8. 7
      docs/docs/en/contribute/release/release-post.md
  9. 1
      docs/docs/en/contribute/release/release-prepare.md
  10. 32
      docs/docs/en/contribute/release/release.md
  11. 7
      docs/docs/zh/contribute/release/release-post.md
  12. 1
      docs/docs/zh/contribute/release/release-prepare.md
  13. 31
      docs/docs/zh/contribute/release/release.md
  14. 2
      dolphinscheduler-api/pom.xml
  15. 35
      dolphinscheduler-dist/pom.xml
  16. 34
      dolphinscheduler-dist/src/main/assembly/dolphinscheduler-python-api.xml
  17. 7
      dolphinscheduler-dist/src/main/assembly/dolphinscheduler-src.xml
  18. 34
      dolphinscheduler-python/pydolphinscheduler/.coveragerc
  19. 19
      dolphinscheduler-python/pydolphinscheduler/.isort.cfg
  20. 56
      dolphinscheduler-python/pydolphinscheduler/.pre-commit-config.yaml
  21. 228
      dolphinscheduler-python/pydolphinscheduler/LICENSE
  22. 5
      dolphinscheduler-python/pydolphinscheduler/NOTICE
  23. 35
      dolphinscheduler-python/pydolphinscheduler/RELEASE.md
  24. 44
      dolphinscheduler-python/pydolphinscheduler/docs/Makefile
  25. 54
      dolphinscheduler-python/pydolphinscheduler/docs/make.bat
  26. 0
      dolphinscheduler-python/pydolphinscheduler/docs/source/_static/.gitkeep
  27. 27
      dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versioning.html
  28. 46
      dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versions.html
  29. 47
      dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst
  30. 36
      dolphinscheduler-python/pydolphinscheduler/docs/source/cli.rst
  31. 151
      dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst
  32. 121
      dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py
  33. 218
      dolphinscheduler-python/pydolphinscheduler/docs/source/config.rst
  34. 30
      dolphinscheduler-python/pydolphinscheduler/docs/source/howto/index.rst
  35. 51
      dolphinscheduler-python/pydolphinscheduler/docs/source/howto/remote-submit.rst
  36. 171
      dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst
  37. 40
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst
  38. 46
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst
  39. 47
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst
  40. 41
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dvc.rst
  41. 40
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst
  42. 33
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/func_wrap.rst
  43. 29
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst
  44. 48
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst
  45. 42
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst
  46. 42
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/mlflow.rst
  47. 42
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/openmldb.rst
  48. 29
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst
  49. 29
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst
  50. 42
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/pytorch.rst
  51. 46
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sagemaker.rst
  52. 41
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst
  53. 41
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst
  54. 35
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst
  55. 38
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst
  56. 42
      dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst
  57. 319
      dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst
  58. 43
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Condition.yaml
  59. 33
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/DataX.yaml
  60. 76
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent.yaml
  61. 26
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent_External.yaml
  62. 46
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dvc.yaml
  63. 29
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Flink.yaml
  64. 37
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Http.yaml
  65. 29
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MapReduce.yaml
  66. 40
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MoreConfiguration.yaml
  67. 33
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/OpenMLDB.yaml
  68. 27
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Procedure.yaml
  69. 30
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Python.yaml
  70. 53
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Pytorch.yaml
  71. 28
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sagemaker.yaml
  72. 40
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Shell.yaml
  73. 45
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sql.yaml
  74. 27
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/SubProcess.yaml
  75. 39
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Switch.yaml
  76. 62
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_datax.json
  77. 18
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sagemaker_params.json
  78. 22
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sql.sql
  79. 26
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sub_workflow.yaml
  80. 69
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml
  81. 46
      dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/tutorial.yaml
  82. 21
      dolphinscheduler-python/pydolphinscheduler/pytest.ini
  83. 16
      dolphinscheduler-python/pydolphinscheduler/setup.cfg
  84. 22
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py
  85. 18
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/__init__.py
  86. 106
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/commands.py
  87. 193
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/configuration.py
  88. 30
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py
  89. 62
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/database.py
  90. 94
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/engine.py
  91. 73
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource.py
  92. 466
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/yaml_process_define.py
  93. 58
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/default_config.yaml
  94. 18
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/__init__.py
  95. 55
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/bulk_create_example.py
  96. 59
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_condition_example.py
  97. 95
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_datax_example.py
  98. 74
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dependent_example.py
  99. 52
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dvc_example.py
  100. 33
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_flink_example.py
  101. Some files were not shown because too many files have changed in this diff Show More

12
dolphinscheduler-python/pydolphinscheduler/.flake8 → .flake8

@ -19,15 +19,6 @@
max-line-length = 110
exclude =
.git,
__pycache__,
.pytest_cache,
*.egg-info,
docs/source/conf.py
old,
build,
dist,
htmlcov,
.tox,
dist,
ignore =
# It's clear and not need to add docstring
@ -35,6 +26,3 @@ ignore =
D105, # D105: Missing docstring in magic method
# Conflict to Black
W503 # W503: Line breaks before binary operators
per-file-ignores =
*/pydolphinscheduler/side/__init__.py:F401
*/pydolphinscheduler/tasks/__init__.py:F401

1
.github/CODEOWNERS

@ -38,7 +38,6 @@
/dolphinscheduler-task-plugin/ @caishunfeng @SbloodyS @zhuangchong
/dolphinscheduler-tools/ @caishunfeng @SbloodyS @zhongjiajie @EricGao888
/script/ @caishunfeng @SbloodyS @zhongjiajie @EricGao888
/dolphinscheduler-python/ @zhongjiajie
/dolphinscheduler-ui/ @songjianet @Amy0104
/docs/ @zhongjiajie @Tianqi-Dotes @EricGao888
/licenses/ @kezhenxu94 @zhongjiajie

4
.github/actions/labeler/labeler.yml

@ -15,9 +15,6 @@
# limitations under the License.
#
Python:
- any: ['dolphinscheduler-python/**/*']
backend:
- 'dolphinscheduler-alert/**/*'
- 'dolphinscheduler-api/**/*'
@ -40,7 +37,6 @@ backend:
document:
- 'docs/**/*'
- 'dolphinscheduler-python/pydolphinscheduler/docs/**/*'
CI&CD:
- any: ['.github/**/*']

205
.github/workflows/py-ci.yml

@ -1,205 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Python API
on:
push:
branches:
- dev
paths:
- 'dolphinscheduler-python/**'
pull_request:
concurrency:
group: py-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
defaults:
run:
working-directory: dolphinscheduler-python/pydolphinscheduler
# We have to update setuptools wheel to package with package_data, LICENSE, NOTICE
env:
DEPENDENCES: pip setuptools wheel tox
jobs:
paths-filter:
name: Python-Path-Filter
runs-on: ubuntu-latest
outputs:
not-docs: ${{ steps.filter.outputs.not-docs }}
py-change: ${{ steps.filter.outputs.py-change }}
steps:
- uses: actions/checkout@v2
- uses: dorny/paths-filter@b2feaf19c27470162a626bd6fa8438ae5b263721
id: filter
with:
filters: |
not-docs:
- '!(docs/**)'
py-change:
- 'dolphinscheduler-python/pydolphinscheduler/**'
lint:
name: Lint
if: ${{ (needs.paths-filter.outputs.py-change == 'true') || (github.event_name == 'push') }}
timeout-minutes: 15
needs: paths-filter
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
uses: actions/setup-python@v4
with:
python-version: 3.7
- name: Install Dependences
run: |
python -m pip install --upgrade ${{ env.DEPENDENCES }}
- name: Run All Lint Check
run: |
python -m tox -vv -e lint
pytest:
name: Pytest
timeout-minutes: 15
needs: lint
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
# YAML parse `3.10` to `3.1`, so we have to add quotes for `'3.10'`, see also:
# https://github.com/actions/setup-python/issues/160#issuecomment-724485470
python-version: [3.6, 3.7, 3.8, 3.9, '3.10', 3.11-dev]
os: [ubuntu-latest, macOS-latest, windows-latest]
# Skip because dependence [py4j](https://pypi.org/project/py4j/) not work on those environments
exclude:
- os: windows-latest
python-version: '3.10'
- os: windows-latest
python-version: 3.11-dev
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install Dependences
run: |
python -m pip install --upgrade ${{ env.DEPENDENCES }}
- name: Run All Tests
run: |
python -m tox -vv -e code-test
doc-build:
name: Docs Build Test
timeout-minutes: 15
needs: lint
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
env-list: [doc-build, doc-build-multi]
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
uses: actions/setup-python@v4
with:
python-version: 3.7
- name: Install Dependences
run: |
python -m pip install --upgrade ${{ env.DEPENDENCES }}
- name: Run Build Docs Tests ${{ matrix.env-list }}
run: |
python -m tox -vv -e ${{ matrix.env-list }}
local-ci:
name: Local CI
timeout-minutes: 15
needs:
- pytest
- doc-build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
uses: actions/setup-python@v4
with:
python-version: 3.7
- name: Install Dependences
run: |
python -m pip install --upgrade ${{ env.DEPENDENCES }}
- name: Run Tests Build Docs
run: |
python -m tox -vv -e local-ci
integrate-test:
name: Integrate Test
if: ${{ (needs.paths-filter.outputs.not-docs == 'true') || (github.event_name == 'push') }}
runs-on: ubuntu-latest
needs: paths-filter
timeout-minutes: 30
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Sanity Check
uses: ./.github/actions/sanity-check
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Cache local Maven repository
uses: actions/cache@v3
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-maven-
# Switch to project root directory to run mvnw command
- name: Build Image
working-directory: ./
run: |
./mvnw -B clean install \
-Dmaven.test.skip \
-Dmaven.javadoc.skip \
-Dcheckstyle.skip=true \
-Pdocker,release -Ddocker.tag=ci \
-pl dolphinscheduler-standalone-server -am
- name: Set up Python 3.7
uses: actions/setup-python@v4
with:
python-version: 3.7
- name: Install Dependences
run: |
python -m pip install --upgrade ${{ env.DEPENDENCES }}
- name: Run Integrate Tests
run: |
python -m tox -vv -e integrate-test
result:
name: Python
runs-on: ubuntu-latest
timeout-minutes: 30
needs: [ paths-filter, local-ci, integrate-test ]
if: always()
steps:
- name: Status
# We need change CWD to current directory to avoid global default working directory not exists
working-directory: ./
run: |
if [[ ${{ needs.paths-filter.outputs.not-docs }} == 'false' && ${{ github.event_name }} == 'pull_request' ]]; then
echo "Only document change, skip both python unit and integrate test!"
exit 0
fi
if [[ ${{ needs.paths-filter.outputs.py-change }} == 'false' && ${{ needs.integrate-test.result }} == 'success' && ${{ github.event_name }} == 'pull_request' ]]; then
echo "No python code change, and integrate test pass!"
exit 0
fi
if [[ ${{ needs.integrate-test.result }} != 'success' || ${{ needs.local-ci.result }} != 'success' ]]; then
echo "py-ci Failed!"
exit -1
fi

1
.github/workflows/unit-test.yml

@ -23,7 +23,6 @@ on:
paths-ignore:
- '**/*.md'
- 'dolphinscheduler-ui'
- 'dolphinscheduler-python/pydolphinscheduler'
branches:
- dev

15
.gitignore vendored

@ -50,18 +50,3 @@ dolphinscheduler-common/test
dolphinscheduler-worker/logs
dolphinscheduler-master/logs
dolphinscheduler-api/logs
# ------------------
# pydolphinscheduler
# ------------------
# Cache
__pycache__/
.tox/
# Build
build/
*egg-info/
# Test coverage
.coverage
htmlcov/

49
README.md

@ -17,30 +17,31 @@ Dolphin Scheduler Official Website
DolphinScheduler is a distributed and extensible workflow scheduler platform with powerful DAG visual interfaces, dedicated to solving complex job dependencies in the data pipeline and providing various types of jobs available `out of the box`.
Its main objectives are as follows:
- Highly Reliable,
DolphinScheduler adopts a decentralized multi-master and multi-worker architecture design, which naturally supports easy expansion and high availability (not restricted by a single point of bottleneck), and its performance increases linearly with the increase of machines
- High performance, supporting tens of millions of tasks every day
- Support multi-tenant.
- Cloud Native, DolphinScheduler supports multi-cloud/data center workflow management, also
supports Kubernetes, Docker deployment and custom task types, distributed
scheduling, with overall scheduling capability increased linearly with the
scale of the cluster
- Support various task types: Shell, MR, Spark, SQL (MySQL, PostgreSQL, hive, spark SQL), Python, Sub_Process, Procedure, etc.
- Support scheduling of workflows and dependencies, manual scheduling to pause/stop/recover task, support failure task retry/alarm, recover specified nodes from failure, kill task, etc.
- Associate the tasks according to the dependencies of the tasks in a DAG graph, which can visualize the running state of the task in real-time.
- WYSIWYG online editing tasks
- Support the priority of workflows & tasks, task failover, and task timeout alarm or failure.
- Support workflow global parameters and node customized parameter settings.
- Support online upload/download/management of resource files, etc. Support online file creation and editing.
- Support task log online viewing and scrolling and downloading, etc.
- Support the viewing of Master/Worker CPU load, memory, and CPU usage metrics.
- Support displaying workflow history in tree/Gantt chart, as well as statistical analysis on the task status & process status in each workflow.
- Support back-filling data.
- Support internationalization.
- More features waiting for partners to explore...
## What's in DolphinScheduler
The key features for DolphinScheduler are as follows:
- Easy to deploy, we provide 4 ways to deploy, such as Standalone deployment,Cluster deployment,Docker / Kubernetes deployment and Rainbond deployment
- Easy to use, there are four ways to create workflows:
- Visually, create tasks by dragging and dropping tasks
- [PyDolphinScheduler](https://dolphinscheduler.apache.org/python/dev/index.html), Creating workflows via Python API, aka workflow-as-code
- Yaml definition, mapping yaml into workflow(have to install PyDolphinScheduler currently)
- Open API, Creating workflows
- Highly Reliable,
DolphinScheduler uses a decentralized multi-master and multi-worker architecture, which naturally supports horizontal scaling and high availability
- High performance, its performance is N times faster than other orchestration platform and it can support tens of millions of tasks per day
- Supports multi-tenancy
- Supports various task types: Shell, MR, Spark, SQL (MySQL, PostgreSQL, Hive, Spark SQL), Python, Procedure, Sub_Workflow,
Http, K8s, Jupyter, MLflow, SageMaker, DVC, Pytorch, Amazon EMR, etc
- Orchestrating workflows and dependencies, you can pause/stop/recover task any time, failed tasks can be set to automatically retry
- Visualizing the running state of the task in real-time and seeing the task runtime log
- What you see is what you get when you edit the task on the UI
- Backfill can be operated on the UI directly
- Perfect project, resource, data source-level permission control
- Displaying workflow history in tree/Gantt chart, as well as statistical analysis on the task status & process status in each workflow
- Supports internationalization
- Cloud Native, DolphinScheduler supports orchestrating multi-cloud/data center workflow, and
supports custom task type
- More features waiting for partners to explore
>>>>>>> 1347a8f94 ([chore] Separate Python API into another repository (#12779))
| Stability | Accessibility | Features | Scalability |
|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|

7
docs/docs/en/contribute/release/release-post.md

@ -1,7 +1,7 @@
# Release Post
We still have some publish task to do after we send the announcement mail, currently we have to publish Docker images to
Docker Hub and also publish pydolphinscheduler to PyPI.
Docker Hub.
## Publish Docker Image
@ -20,11 +20,6 @@ We could reuse the main command the CI run and publish our Docker images to Dock
-Pdocker,release
```
## Publish pydolphinscheduler to PyPI
Python API need to release to PyPI for easier download and use, you can see more detail in [Python API release](https://github.com/apache/dolphinscheduler-sdk-python/blob/main/RELEASE.md)
to finish PyPI release.
## Get All Contributors
You might need all contributors in current release when you want to publish the release news or announcement, you could

1
docs/docs/en/contribute/release/release-prepare.md

@ -23,7 +23,6 @@ For example, to release `x.y.z`, the following updates are required:
- `deploy/kubernetes/dolphinscheduler`:
- `Chart.yaml`: `appVersion` needs to be updated to x.y.z (`version` is helm chart version,incremented and different from x.y.z)
- `values.yaml`: `image.tag` needs to be updated to x.y.z
- `dolphinscheduler-python/pydolphinscheduler/setup.py`: change `version` to x.y.z
- Version in the docs:
- Change the placeholder `<version>`(except `pom`) to the `x.y.z` in directory `docs`
- Add new history version

32
docs/docs/en/contribute/release/release.md

@ -10,8 +10,6 @@ all conditions are met, if any or them are missing, you should install them and
java -version
# Maven requests
mvn -version
# Python 3.6 above is requests, and you have to make keyword `python` work in your terminal and version match
python --version
```
## GPG Settings
@ -167,10 +165,10 @@ git push origin "${VERSION}"-release
```shell
mvn release:prepare -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true -Dspotless.check.skip=true" -DautoVersionSubmodules=true -DdryRun=true -Dusername="${GH_USERNAME}"
mvn release:prepare -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true -Dspotless.check.skip=true" -DautoVersionSubmodules=true -DdryRun=true -Dusername="${GH_USERNAME}"
```
* `-Prelease,python`: choose release and python profile, which will pack all the source codes, jar files and executable binary packages, and Python distribute package.
* `-Prelease`: choose release profile, which will pack all the source codes, jar files and executable binary packages.
* `-DautoVersionSubmodules=true`: it can make the version number is inputted only once and not for each sub-module.
* `-DdryRun=true`: dry run which means not to generate or submit new version number and new tag.
@ -186,7 +184,7 @@ Then, prepare to execute the release.
```shell
mvn release:prepare -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true -Dspotless.check.skip=true" -DautoVersionSubmodules=true -DpushChanges=false -Dusername="${GH_USERNAME}"
mvn release:prepare -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true -Dspotless.check.skip=true" -DautoVersionSubmodules=true -DpushChanges=false -Dusername="${GH_USERNAME}"
```
It is basically the same as the previous rehearsal command, but deleting `-DdryRun=true` parameter.
@ -218,8 +216,7 @@ git push origin --tags
### Deploy the Release
```shell
<<<<<<< HEAD
mvn release:perform -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true -Dspotless.check.skip=true" -DautoVersionSubmodules=true -Dusername="${GH_USERNAME}"
mvn release:perform -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true -Dspotless.check.skip=true" -DautoVersionSubmodules=true -Dusername="${GH_USERNAME}"
```
After that command is executed, the version to be released will be uploaded to Apache staging repository automatically.
@ -260,7 +257,6 @@ Create folder by version number.
```shell
mkdir -p ~/ds_svn/dev/dolphinscheduler/"${VERSION}"
mkdir -p ~/ds_svn/dev/dolphinscheduler/"${VERSION}"/python
cd ~/ds_svn/dev/dolphinscheduler/"${VERSION}"
```
@ -270,9 +266,6 @@ Add source code packages, binary packages and executable binary packages to SVN
# Source and binary tarball for main code
cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/*.tar.gz ~/ds_svn/dev/dolphinscheduler/"${VERSION}"
cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/*.tar.gz.asc ~/ds_svn/dev/dolphinscheduler/"${VERSION}"
# Source and binary tarball for Python API
cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/python/* ~/ds_svn/dev/dolphinscheduler/"${VERSION}"/python
```
### Generate sign files
@ -280,10 +273,6 @@ cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/python/* ~/ds_svn/dev/dolp
```shell
shasum -a 512 apache-dolphinscheduler-"${VERSION}"-src.tar.gz >> apache-dolphinscheduler-"${VERSION}"-src.tar.gz.sha512
shasum -b -a 512 apache-dolphinscheduler-"${VERSION}"-bin.tar.gz >> apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.sha512
cd python
shasum -a 512 apache-dolphinscheduler-python-"${VERSION}".tar.gz >> apache-dolphinscheduler-python-"${VERSION}".tar.gz.sha512
shasum -b -a 512 apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl >> apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.sha512
cd ../
```
### Commit to Apache SVN
@ -301,10 +290,6 @@ svn --username="${A_USERNAME}" commit -m "release ${VERSION}"
```shell
shasum -c apache-dolphinscheduler-"${VERSION}"-src.tar.gz.sha512
shasum -c apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.sha512
cd python
shasum -c apache-dolphinscheduler-python-"${VERSION}".tar.gz.sha512
shasum -c apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.sha512
cd ../
```
### Check gpg Signature
@ -338,10 +323,6 @@ Then, check the gpg signature.
```shell
gpg --verify apache-dolphinscheduler-"${VERSION}"-src.tar.gz.asc
gpg --verify apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.asc
cd python
gpg --verify apache-dolphinscheduler-python-"${VERSION}".tar.gz.asc
gpg --verify apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.asc
cd ../
```
> Note: You have to create gpg signature manually when you can not find your `asc` file, the command
@ -352,7 +333,7 @@ cd ../
#### Check source package
Decompress `apache-dolphinscheduler-<VERSION>-src.tar.gz` and `python/apache-dolphinscheduler-python-<VERSION>.tar.gz` then check the following items:
Decompress `apache-dolphinscheduler-<VERSION>-src.tar.gz` then check the following items:
* Check whether source tarball is oversized for including nonessential files
* `LICENSE` and `NOTICE` files exist
@ -365,8 +346,7 @@ Decompress `apache-dolphinscheduler-<VERSION>-src.tar.gz` and `python/apache-dol
#### Check binary packages
Decompress `apache-dolphinscheduler-<VERSION>-src.tar.gz` and `python/apache-dolphinscheduler-python-<VERSION>-bin.tar.gz`
to check the following items:
Decompress `apache-dolphinscheduler-<VERSION>-src.tar.gz` to check the following items:
- `LICENSE` and `NOTICE` files exist
- Correct year in `NOTICE` file

7
docs/docs/zh/contribute/release/release-post.md

@ -1,6 +1,6 @@
# 发版后续
发送公告邮件后,我们还有一些发布任务要做,目前我们必须将 Docker 镜像发布到 Docker Hub 和 并且需要将 pydolphinscheduler 发布到 PyPI
发送公告邮件后,我们还有一些发布任务要做,目前我们必须将 Docker 镜像发布到 Docker Hub。
## 发布 Docker 镜像
@ -19,11 +19,6 @@
-Pdocker,release
```
## 发布 pydolphinscheduler 到 PyPI
需要将 Python API 发布到 PyPI,请参考 [Python API release](https://github.com/apache/dolphinscheduler-sdk-python/blob/main/RELEASE.md)
完成 PyPI 的发版
## 获取全部的贡献者
当您想要发布新版本的新闻或公告时,您可能需要当前版本的所有贡献者,您可以使用 git 命令 `git log --pretty="%an" <PREVIOUS-RELEASE-SHA>..<CURRENT-RELEASE-SHA> | sort | uniq`

1
docs/docs/zh/contribute/release/release-prepare.md

@ -23,7 +23,6 @@
- `deploy/kubernetes/dolphinscheduler`:
- `Chart.yaml`: `appVersion` 版本更新为 x.y.z (`version` 为 helm chart 版本, 增量更新但不要设置为 x.y.z)
- `values.yaml`: `image.tag` 版本更新为 x.y.z
- `dolphinscheduler-python/pydolphinscheduler/setup.py`: 修改其中的 `version` 为 x.y.z
- 修改文档(docs模块)中的版本号:
- 将 `docs` 文件夹下文件的占位符 `<version>` (除了 pom.xml 相关的) 修改成 `x.y.z`
- 新增历史版本

31
docs/docs/zh/contribute/release/release.md

@ -9,8 +9,6 @@
java -version
# 需要 Maven
mvn -version
# 需要 Python 3.6 及以上的版本,并且需要 `python` 关键字能在命令行中运行,且版本符合条件。
python --version
```
## GPG设置
@ -174,10 +172,10 @@ git push origin "${VERSION}"-release
```shell
# 运行发版校验
mvn release:prepare -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true -Dspotless.check.skip=true" -DautoVersionSubmodules=true -DdryRun=true -Dusername="${GH_USERNAME}"
mvn release:prepare -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true -Dspotless.check.skip=true" -DautoVersionSubmodules=true -DdryRun=true -Dusername="${GH_USERNAME}"
```
* `-Prelease,python`: 选择release和python的profile,这个profile会打包所有源码、jar文件以及可执行二进制包,以及Python的二进制包。
* `-Prelease`: 选择release的profile,这个profile会打包所有源码、jar文件以及可执行二进制包。
* `-DautoVersionSubmodules=true`: 作用是发布过程中版本号只需要输入一次,不必为每个子模块都输入一次。
* `-DdryRun=true`: 演练,即不产生版本号提交,不生成新的tag。
@ -192,7 +190,7 @@ mvn release:clean
然后准备执行发布。
```shell
mvn release:prepare -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true -Dspotless.check.skip=true" -DautoVersionSubmodules=true -DpushChanges=false -Dusername="${GH_USERNAME}"
mvn release:prepare -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true -Dspotless.check.skip=true" -DautoVersionSubmodules=true -DpushChanges=false -Dusername="${GH_USERNAME}"
```
和上一步演练的命令基本相同,去掉了 `-DdryRun=true` 参数。
@ -221,7 +219,7 @@ git push origin --tags
### 部署发布
```shell
mvn release:perform -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true -Dspotless.check.skip=true" -DautoVersionSubmodules=true -Dusername="${GH_USERNAME}"
mvn release:perform -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true -Dspotless.check.skip=true" -DautoVersionSubmodules=true -Dusername="${GH_USERNAME}"
```
执行完该命令后,待发布版本会自动上传到Apache的临时筹备仓库(staging repository)。你可以通过访问 [apache staging repositories](https://repository.apache.org/#stagingRepositories)
@ -260,7 +258,6 @@ gpg -a --export <YOUR-GPG-KEY-ID> >> KEYS
```shell
mkdir -p ~/ds_svn/dev/dolphinscheduler/"${VERSION}"
mkdir -p ~/ds_svn/dev/dolphinscheduler/"${VERSION}"/python
cd ~/ds_svn/dev/dolphinscheduler/"${VERSION}"
```
@ -270,9 +267,6 @@ cd ~/ds_svn/dev/dolphinscheduler/"${VERSION}"
# 主程序源码包和二进制包
cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/*.tar.gz ~/ds_svn/dev/dolphinscheduler/"${VERSION}"
cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/*.tar.gz.asc ~/ds_svn/dev/dolphinscheduler/"${VERSION}"
# Python API 源码和二进制包
cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/python/* ~/ds_svn/dev/dolphinscheduler/"${VERSION}"/python
```
### 生成文件签名
@ -280,10 +274,6 @@ cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/python/* ~/ds_svn/dev/dolp
```shell
shasum -a 512 apache-dolphinscheduler-"${VERSION}"-src.tar.gz >> apache-dolphinscheduler-"${VERSION}"-src.tar.gz.sha512
shasum -b -a 512 apache-dolphinscheduler-"${VERSION}"-bin.tar.gz >> apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.sha512
cd python
shasum -a 512 apache-dolphinscheduler-python-"${VERSION}".tar.gz >> apache-dolphinscheduler-python-"${VERSION}".tar.gz.sha512
shasum -b -a 512 apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl >> apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.sha512
cd ../
```
### 提交Apache SVN
@ -301,10 +291,6 @@ svn --username="${A_USERNAME}" commit -m "release ${VERSION}"
```shell
shasum -c apache-dolphinscheduler-"${VERSION}"-src.tar.gz.sha512
shasum -c apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.sha512
cd python
shasum -c apache-dolphinscheduler-python-"${VERSION}".tar.gz.sha512
shasum -c apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.sha512
cd ../
```
### 检查gpg签名
@ -337,10 +323,6 @@ Your decision? 5
```shell
gpg --verify apache-dolphinscheduler-"${VERSION}"-src.tar.gz.asc
gpg --verify apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.asc
cd python
gpg --verify apache-dolphinscheduler-python-"${VERSION}".tar.gz.asc
gpg --verify apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.asc
cd ../
```
> 注意:当你找不到你的 `asc` 文件时,你必须手动创建 gpg 签名,命令 `gpg --armor --detach-sign --digest-algo=SHA512 apache-dolphinscheduler-"${VERSION}"- bin.tar.gz`
@ -350,7 +332,7 @@ cd ../
#### 检查源码包的文件内容
解压缩`apache-dolphinscheduler-<VERSION>-src.tar.gz`以及Python文件夹下的`apache-dolphinscheduler-python-<VERSION>.tar.gz`,进行如下检查:
解压缩`apache-dolphinscheduler-<VERSION>-src.tar.gz`,进行如下检查:
- 检查源码包是否包含由于包含不必要文件,致使tarball过于庞大
- 存在`LICENSE`和`NOTICE`文件
@ -362,8 +344,7 @@ cd ../
#### 检查二进制包的文件内容
解压缩`apache-dolphinscheduler-<VERSION>-src.tar.gz`和`apache-dolphinscheduler-python-<VERSION>-bin.tar.gz`
进行如下检查:
解压缩`apache-dolphinscheduler-<VERSION>-src.tar.gz`进行如下检查:
- 存在`LICENSE`和`NOTICE`文件
- 所有文本文件开头都有ASF许可证

2
dolphinscheduler-api/pom.xml

@ -173,7 +173,7 @@
</exclusions>
</dependency>
<!-- Python -->
<!-- Python API's Gateway server -->
<dependency>
<groupId>net.sf.py4j</groupId>
<artifactId>py4j</artifactId>

35
dolphinscheduler-dist/pom.xml vendored

@ -72,11 +72,6 @@
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-tools</artifactId>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-python</artifactId>
</dependency>
</dependencies>
<build>
@ -125,35 +120,5 @@
</plugins>
</build>
</profile>
<profile>
<id>python</id>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<id>python</id>
<goals>
<goal>single</goal>
</goals>
<phase>package</phase>
<configuration>
<!-- Make final directory with simple name `python`, and without any addtion information -->
<finalName>python</finalName>
<appendAssemblyId>false</appendAssemblyId>
<descriptors>
<descriptor>src/main/assembly/dolphinscheduler-python-api.xml</descriptor>
</descriptors>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

34
dolphinscheduler-dist/src/main/assembly/dolphinscheduler-python-api.xml vendored

@ -1,34 +0,0 @@
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>python-api</id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>${basedir}/../dolphinscheduler-python/pydolphinscheduler/dist</directory>
<outputDirectory>.</outputDirectory>
</fileSet>
</fileSets>
</assembly>

7
dolphinscheduler-dist/src/main/assembly/dolphinscheduler-src.xml vendored

@ -57,13 +57,6 @@
<exclude>**/dolphinscheduler-ui/node/**</exclude>
<exclude>**/dolphinscheduler-ui/node_modules/**</exclude>
<!-- python ignore -->
<exclude>**/dolphinscheduler-python/pydolphinscheduler/.pytest_cache/**</exclude>
<exclude>**/dolphinscheduler-python/pydolphinscheduler/build/**</exclude>
<exclude>**/dolphinscheduler-python/pydolphinscheduler/dist/**</exclude>
<exclude>**/dolphinscheduler-python/pydolphinscheduler/dist/**</exclude>
<exclude>**/dolphinscheduler-python/pydolphinscheduler/htmlcov/**</exclude>
<!-- eclipse ignore -->
<exclude>**/.settings/**</exclude>
<exclude>**/.project</exclude>

34
dolphinscheduler-python/pydolphinscheduler/.coveragerc

@ -1,34 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
[run]
command_line = -m pytest
omit =
# Ignore all test cases in tests/
tests/*
# Ignore examples directory
*/pydolphinscheduler/examples/*
# TODO. Temporary ignore java_gateway file, because we could not find good way to test it.
*/pydolphinscheduler/java_gateway.py
[report]
# Don’t report files that are 100% covered
skip_covered = True
show_missing = True
precision = 2
# Report will fail when coverage under 90.00%
fail_under = 90

19
dolphinscheduler-python/pydolphinscheduler/.isort.cfg

@ -1,19 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
[settings]
profile=black

56
dolphinscheduler-python/pydolphinscheduler/.pre-commit-config.yaml

@ -1,56 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
default_stages: [commit, push]
default_language_version:
# force all python hooks to run python3
python: python3
repos:
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
name: isort (python)
- repo: https://github.com/psf/black
rev: 22.1.0
hooks:
- id: black
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
hooks:
- id: flake8
additional_dependencies: [
'flake8-docstrings>=1.6',
'flake8-black>=0.2',
]
# pre-commit run in the root, so we have to point out the full path of configuration
args: [
--config,
dolphinscheduler-python/pydolphinscheduler/.flake8
]
- repo: https://github.com/pycqa/autoflake
rev: v1.4
hooks:
- id: autoflake
args: [
--remove-all-unused-imports,
--ignore-init-module-imports,
--in-place
]

228
dolphinscheduler-python/pydolphinscheduler/LICENSE

@ -1,228 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} {name of copyright owner}
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
============================================================================
Apache DolphinScheduler Python API SUBCOMPONENTS:
The Apache DolphinScheduler Python API project contains subcomponents
with separate copyright notices and license terms. Your use of the source
code for the these subcomponents is subject to the terms and conditions
of the following licenses.
========================================================================
BSD licenses
========================================================================
The following components are provided under a BSD license. See project link for details.
The text of each license is also included at licenses/LICENSE-[project].txt.
py4j v0.10 (https://github.com/py4j/py4j)
click v8.0 (https://github.com/pallets/click)
========================================================================
MIT licenses
========================================================================
The following components are provided under the MIT License. See project link for details.
The text of each license is also included at licenses/LICENSE-[project].txt.
ruamel.yaml v0.17 (https://sourceforge.net/projects/ruamel-yaml/)

5
dolphinscheduler-python/pydolphinscheduler/NOTICE

@ -1,5 +0,0 @@
Apache DolphinScheduler
Copyright 2017-2022 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).

35
dolphinscheduler-python/pydolphinscheduler/RELEASE.md

@ -1,35 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# Release
**PyDolphinScheduler** office release is in [ASF Distribution Directory](https://downloads.apache.org/dolphinscheduler/),
and it should be released together with [apache-dolphinscheduler](https://github.com/apache/dolphinscheduler).
## To ASF Distribution Directory
You could release to [ASF Distribution Directory](https://downloads.apache.org/dolphinscheduler/) according to
[release guide](../../docs/docs/en/contribute/release/release-prepare.md) in DolphinScheduler
website.
## To PyPi
[PyPI](https://pypi.org), Python Package Index, is a repository of software for the Python programming language.
User could install Python package from it. Release to PyPi make user easier to install and try PyDolphinScheduler,
There is an official way to package project from [PyPA](https://packaging.python.org/en/latest/tutorials/packaging-projects)

44
dolphinscheduler-python/pydolphinscheduler/docs/Makefile

@ -1,44 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
# Add opts `turn warnings into errors` strict sphinx-build behavior
SPHINXOPTS ?= -W
SPHINXBUILD ?= sphinx-build
SPHINXMULTIVERSION ?= sphinx-multiversion
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
# Create multiple version of docs
multiversion:
@$(SPHINXMULTIVERSION) "$(SOURCEDIR)" "$(BUILDDIR)/html"

54
dolphinscheduler-python/pydolphinscheduler/docs/make.bat

@ -1,54 +0,0 @@
REM Licensed to the Apache Software Foundation (ASF) under one
REM or more contributor license agreements. See the NOTICE file
REM distributed with this work for additional information
REM regarding copyright ownership. The ASF licenses this file
REM to you under the Apache License, Version 2.0 (the
REM "License"); you may not use this file except in compliance
REM with the License. You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing,
REM software distributed under the License is distributed on an
REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
REM KIND, either express or implied. See the License for the
REM specific language governing permissions and limitations
REM under the License.
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
REM Add opts `turn warnings into errors` strict sphinx-build behavior
set SPHINXOPTS=-W
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

0
dolphinscheduler-python/pydolphinscheduler/docs/source/_static/.gitkeep

27
dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versioning.html

@ -1,27 +0,0 @@
{#
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
#}
{% if versions %}
<h3>{{ _('Versions') }}</h3>
<ul>
{%- for item in versions %}
<li><a href="{{ item.url }}">{{ item.name }}</a></li>
{%- endfor %}
</ul>
{% endif %}

46
dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versions.html

@ -1,46 +0,0 @@
{#
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
#}
{%- if current_version %}
<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
<span class="rst-current-version" data-toggle="rst-current-version">
<span class="fa fa-book"> Other Versions</span>
v: {{ current_version.name }}
<span class="fa fa-caret-down"></span>
</span>
<div class="rst-other-versions">
{%- if versions.tags %}
<dl>
<dt>Tags</dt>
{%- for item in versions.tags %}
<dd><a href="{{ item.url }}">{{ item.name }}</a></dd>
{%- endfor %}
</dl>
{%- endif %}
{%- if versions.branches %}
<dl>
<dt>Branches</dt>
{%- for item in versions.branches %}
<dd><a href="{{ item.url }}">{{ item.name }}</a></dd>
{%- endfor %}
</dl>
{%- endif %}
</div>
</div>
{%- endif %}

47
dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst

@ -1,47 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
API
===
Core
----
.. automodule:: pydolphinscheduler.core
:inherited-members:
Models
------
.. automodule:: pydolphinscheduler.models
:inherited-members:
Tasks
-----
.. automodule:: pydolphinscheduler.tasks
:inherited-members:
Constants
---------
.. automodule:: pydolphinscheduler.constants
Exceptions
----------
.. automodule:: pydolphinscheduler.exceptions

36
dolphinscheduler-python/pydolphinscheduler/docs/source/cli.rst

@ -1,36 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Command Line Interface
======================
*PyDolphinScheduler* have mechanism call CLI(command line interface) to help user control it in Shell.
Prepare
-------
You have to :ref:`install PyDolphinScheduler <start:installing pydolphinscheduler>` first before you using
its CLI
Usage
-----
Here is basic usage about the command line of *PyDolphinScheduler*
.. click:: pydolphinscheduler.cli.commands:cli
:prog: pydolphinscheduler
:nested: full

151
dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst

@ -1,151 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Concepts
========
In this section, you would know the core concepts of *PyDolphinScheduler*.
Process Definition
------------------
Process definition describe the whole things except `tasks`_ and `tasks dependence`_, which including
name, schedule interval, schedule start time and end time. You would know scheduler
Process definition could be initialized in normal assign statement or in context manger.
.. code-block:: python
# Initialization with assign statement
pd = ProcessDefinition(name="my first process definition")
# Or context manger
with ProcessDefinition(name="my first process definition") as pd:
pd.submit()
Process definition is the main object communicate between *PyDolphinScheduler* and DolphinScheduler daemon.
After process definition and task is be declared, you could use `submit` and `run` notify server your definition.
If you just want to submit your definition and create workflow, without run it, you should use attribute `submit`.
But if you want to run the workflow after you submit it, you could use attribute `run`.
.. code-block:: python
# Just submit definition, without run it
pd.submit()
# Both submit and run definition
pd.run()
Schedule
~~~~~~~~
We use parameter `schedule` determine the schedule interval of workflow, *PyDolphinScheduler* support seven
asterisks expression, and each of the meaning of position as below
.. code-block:: text
* * * * * * *
┬ ┬ ┬ ┬ ┬ ┬ ┬
│ │ │ │ │ │ │
│ │ │ │ │ │ └─── year
│ │ │ │ │ └───── day of week (0 - 7) (0 to 6 are Sunday to Saturday, or use names; 7 is Sunday, the same as 0)
│ │ │ │ └─────── month (1 - 12)
│ │ │ └───────── day of month (1 - 31)
│ │ └─────────── hour (0 - 23)
│ └───────────── min (0 - 59)
└─────────────── second (0 - 59)
Here we add some example crontab:
- `0 0 0 * * ? *`: Workflow execute every day at 00:00:00.
- `10 2 * * * ? *`: Workflow execute hourly day at ten pass two.
- `10,11 20 0 1,2 * ? *`: Workflow execute first and second day of month at 00:20:10 and 00:20:11.
Tenant
~~~~~~
Tenant is the user who run task command in machine or in virtual machine. it could be assign by simple string.
.. code-block:: python
#
pd = ProcessDefinition(name="process definition tenant", tenant="tenant_exists")
.. note::
Make should tenant exists in target machine, otherwise it will raise an error when you try to run command
Tasks
-----
Task is the minimum unit running actual job, and it is nodes of DAG, aka directed acyclic graph. You could define
what you want to in the task. It have some required parameter to make uniqueness and definition.
Here we use :py:meth:`pydolphinscheduler.tasks.Shell` as example, parameter `name` and `command` is required and must be provider. Parameter
`name` set name to the task, and parameter `command` declare the command you wish to run in this task.
.. code-block:: python
# We named this task as "shell", and just run command `echo shell task`
shell_task = Shell(name="shell", command="echo shell task")
If you want to see all type of tasks, you could see :doc:`tasks/index`.
Tasks Dependence
~~~~~~~~~~~~~~~~
You could define many tasks in on single `Process Definition`_. If all those task is in parallel processing,
then you could leave them alone without adding any additional information. But if there have some tasks should
not be run unless pre task in workflow have be done, we should set task dependence to them. Set tasks dependence
have two mainly way and both of them is easy. You could use bitwise operator `>>` and `<<`, or task attribute
`set_downstream` and `set_upstream` to do it.
.. code-block:: python
# Set task1 as task2 upstream
task1 >> task2
# You could use attribute `set_downstream` too, is same as `task1 >> task2`
task1.set_downstream(task2)
# Set task1 as task2 downstream
task1 << task2
# It is same as attribute `set_upstream`
task1.set_upstream(task2)
# Beside, we could set dependence between task and sequence of tasks,
# we set `task1` is upstream to both `task2` and `task3`. It is useful
# for some tasks have same dependence.
task1 >> [task2, task3]
Task With Process Definition
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In most of data orchestration cases, you should assigned attribute `process_definition` to task instance to
decide workflow of task. You could set `process_definition` in both normal assign or in context manger mode
.. code-block:: python
# Normal assign, have to explicit declaration and pass `ProcessDefinition` instance to task
pd = ProcessDefinition(name="my first process definition")
shell_task = Shell(name="shell", command="echo shell task", process_definition=pd)
# Context manger, `ProcessDefinition` instance pd would implicit declaration to task
with ProcessDefinition(name="my first process definition") as pd:
shell_task = Shell(name="shell", command="echo shell task",
With both `Process Definition`_, `Tasks`_ and `Tasks Dependence`_, we could build a workflow with multiple tasks.

121
dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py

@ -1,121 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import os
import sys
from pathlib import Path
# For sphinx-multiversion, we need to build API docs of the corresponding package version, related issue:
# https://github.com/Holzhaus/sphinx-multiversion/issues/42
pkg_src_dir = (
Path(os.environ.get("SPHINX_MULTIVERSION_SOURCEDIR", default="."))
.joinpath("../../src")
.resolve()
)
sys.path.insert(0, str(pkg_src_dir))
# Debug to uncomment this to see the source path
# print("=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=")
# print(pkg_src_dir)
# [print(p) for p in sys.path]
# print("=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=")
# -- Project information -----------------------------------------------------
project = "pydolphinscheduler"
copyright = "2022, apache"
author = "apache dolphinscheduler contributors"
# The full version, including alpha/beta/rc tags
release = "0.0.1"
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
# Measures durations of Sphinx processing
"sphinx.ext.duration",
# Semi-automatic make docstrings to document
"sphinx.ext.autodoc",
"sphinx.ext.viewcode",
"sphinx.ext.autosectionlabel",
"sphinx_rtd_theme",
# Documenting command line interface
"sphinx_click.ext",
# Add inline tabbed content
"sphinx_inline_tabs",
"sphinx_copybutton",
"sphinx_multiversion",
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# sphinx_multiversion configuration
html_sidebars = {
"**": [
"versioning.html",
],
}
# Match all exists tag for pydolphinscheduler expect version 2.0.4(not release apache dolphinscheduler)
smv_tag_whitelist = r"^(?!2.0.4)\d+\.\d+\.\d+$"
smv_branch_whitelist = "dev"
smv_remote_whitelist = r"^(origin|upstream)$"
smv_released_pattern = "^refs/tags/.*$"
smv_outputdir_format = "versions/{ref.name}"
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []
autodoc_default_options = {
"members": True,
"show-inheritance": True,
"private-members": True,
"undoc-members": True,
"member-order": "groupwise",
}
autosectionlabel_prefix_document = True
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "sphinx_rtd_theme"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]

218
dolphinscheduler-python/pydolphinscheduler/docs/source/config.rst

@ -1,218 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Configuration
=============
pydolphinscheduler has a built-in module setting necessary configuration to start and run your workflow code.
You could directly use them if you only want to run a quick start or for a simple job like POC. But if you
want to deep use pydolphinscheduler and even use it in production. You should probably need to modify and
change the built-in configuration.
We have two ways to modify the configuration:
- `Using Environment Variables`_: The more lightweight way to modify the configuration. it is useful in
containerization scenarios, like docker and k8s, or when you like to temporarily override configs in the
configuration file.
- `Using Configuration File`_: The more general way to modify the configuration. It is useful when you want
to persist and manage configuration files in one single file.
Using Environment Variables
---------------------------
You could change the configuration by adding or modifying the operating system's environment variables. No
matter what way you used, as long as you can successfully modify the environment variables. We use two common
ways, `Bash <by bash>`_ and `Python OS Module <by python os module>`_, as examples:
By Bash
^^^^^^^
Setting environment variables via `Bash` is the most straightforward and easiest way. We give some examples about
how to change them by Bash.
.. code-block:: bash
# Modify Java Gateway Address
export PYDS_JAVA_GATEWAY_ADDRESS="192.168.1.1"
# Modify Workflow Default User
export PYDS_WORKFLOW_USER="custom-user"
After executing the commands above, both ``PYDS_JAVA_GATEWAY_ADDRESS`` and ``PYDS_WORKFLOW_USER`` will be changed.
The next time you execute and submit your workflow, it will submit to host `192.168.1.1`, and with workflow's user
named `custom-user`.
By Python OS Module
^^^^^^^^^^^^^^^^^^^
pydolphinscheduler is a Python API for Apache DolphinScheduler, and you could modify or add system environment
variables via Python ``os`` module. In this example, we change variables as the same value as we change in
`Bash <by bash>`_. It will take effect the next time you run your workflow, and call workflow ``run`` or ``submit``
method next to ``os.environ`` statement.
.. code-block:: python
import os
# Modify Java Gateway Address
os.environ["PYDS_JAVA_GATEWAY_ADDRESS"] = "192.168.1.1"
# Modify Workflow Default User
os.environ["PYDS_WORKFLOW_USER"] = "custom-user"
All Configurations in Environment Variables
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
All environment variables as below, and you could modify their value via `Bash <by bash>`_ or `Python OS Module <by python os module>`_
+------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| Variable Section | Variable Name | description |
+==================+====================================+====================================================================================================================+
| | ``PYDS_JAVA_GATEWAY_ADDRESS`` | Default Java gateway address, will use its value when it is set. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| Java Gateway | ``PYDS_JAVA_GATEWAY_PORT`` | Default Java gateway port, will use its value when it is set. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_JAVA_GATEWAY_AUTO_CONVERT`` | Default boolean Java gateway auto convert, will use its value when it is set. |
+------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_USER_NAME`` | Default user name, will use when user's ``name`` when does not specify. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_USER_PASSWORD`` | Default user password, will use when user's ``password`` when does not specify. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| Default User | ``PYDS_USER_EMAIL`` | Default user email, will use when user's ``email`` when does not specify. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_USER_PHONE`` | Default user phone, will use when user's ``phone`` when does not specify. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_USER_STATE`` | Default user state, will use when user's ``state`` when does not specify. |
+------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_WORKFLOW_PROJECT`` | Default workflow project name, will use its value when workflow does not specify the attribute ``project``. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_WORKFLOW_TENANT`` | Default workflow tenant, will use its value when workflow does not specify the attribute ``tenant``. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| Default Workflow | ``PYDS_WORKFLOW_USER`` | Default workflow user, will use its value when workflow does not specify the attribute ``user``. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_WORKFLOW_QUEUE`` | Default workflow queue, will use its value when workflow does not specify the attribute ``queue``. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_WORKFLOW_WORKER_GROUP`` | Default workflow worker group, will use its value when workflow does not specify the attribute ``worker_group``. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_WORKFLOW_RELEASE_STATE`` | Default workflow release state, will use its value when workflow does not specify the attribute ``release_state``. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_WORKFLOW_TIME_ZONE`` | Default workflow worker group, will use its value when workflow does not specify the attribute ``timezone``. |
+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_WORKFLOW_WARNING_TYPE`` | Default workflow warning type, will use its value when workflow does not specify the attribute ``warning_type``. |
+------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------+
.. note::
The scope of setting configuration via environment variable is in the workflow, and it will not change the
value of the configuration file. The :doc:`CLI <cli>` command ``config --get`` and ``config --set`` operate
the value of the configuration file, so the command ``config --get`` may return a different value from what
you set in the environment variable, and command ``config --get`` will never change your environment variable.
Using Configuration File
------------------------
If you want to persist and manage configuration in a file instead of environment variables, or maybe you want
want to save your configuration file to a version control system, like Git or SVN, and the way to change
configuration by file is the best choice.
Export Configuration File
^^^^^^^^^^^^^^^^^^^^^^^^^
pydolphinscheduler allows you to change the built-in configurations via CLI or editor you like. pydolphinscheduler
integrated built-in configurations in its package, but you could also export it locally by CLI
.. code-block:: bash
pydolphinscheduler config --init
And it will create a new YAML file in the path `~/pydolphinscheduler/config.yaml` by default. If you want to export
it to another path, you should set `PYDS_HOME` before you run command :code:`pydolphinscheduler config --init`.
.. code-block:: bash
export PYDS_HOME=<CUSTOM_PATH>
pydolphinscheduler config --init
After that, your configuration file will export into `<CUSTOM_PATH>/config.yaml` instead of the default path.
Change Configuration
^^^^^^^^^^^^^^^^^^^^
In section `export configuration file`_ you export the configuration file locally, and as a local file, you could
edit it with any editor you like. After you save your change in your editor, the latest configuration will work
when you run your workflow code.
You could also query or change the configuration via CLI :code:`config --get <config>` or :code:`config --get <config> <val>`.
Both `--get` and `--set` could be called one or more times in single command, and you could only set the leaf
node of the configuration but could get the parent configuration, there are simple examples below:
.. code-block:: bash
# Get single configuration in the leaf node,
# The output look like below:
# java_gateway.address = 127.0.0.1
pydolphinscheduler config --get java_gateway.address
# Get multiple configuration in the leaf node,
# The output look like below:
# java_gateway.address = 127.0.0.1
# java_gateway.port = 25333
pydolphinscheduler config --get java_gateway.address --get java_gateway.port
# Get parent configuration which contain multiple leaf nodes,
# The output look like below:
# java_gateway = ordereddict([('address', '127.0.0.1'), ('port', 25333), ('auto_convert', True)])
pydolphinscheduler config --get java_gateway
# Set single configuration,
# The output look like below:
# Set configuration done.
pydolphinscheduler config --set java_gateway.address 192.168.1.1
# Set multiple configuration
# The output look like below:
# Set configuration done.
pydolphinscheduler config --set java_gateway.address 192.168.1.1 --set java_gateway.port 25334
# Set configuration not in leaf node will fail
# The output look like below:
# Raise error.
pydolphinscheduler config --set java_gateway 192.168.1.1,25334,True
For more information about our CLI, you could see document :doc:`cli`.
All Configurations in File
^^^^^^^^^^^^^^^^^^^^^^^^^^
Here are all our configurations for pydolphinscheduler.
.. literalinclude:: ../../src/pydolphinscheduler/default_config.yaml
:language: yaml
:lines: 18-
Priority
--------
We have two ways to modify the configuration and there is a built-in config in pydolphinscheduler too. It is
very important to understand the priority of the configuration when you use them. The overview of configuration
priority is.
``Environment Variables > Configurations File > Built-in Configurations``
This means that your setting in environment variables or configurations file will overwrite the built-in one.
And you could temporarily modify configurations by setting environment variables without modifying the global
config in the configuration file.

30
dolphinscheduler-python/pydolphinscheduler/docs/source/howto/index.rst

@ -1,30 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
HOWTOs
======
pydolphinscheduler HOWTOs are documents that cover a single, specific topic, and attempt to cover it fairly
completely. This collection is an effort to foster documentation that is more detailed than the :doc:`../concept`
and :doc:`../tutorial`.
Currently, the HOWTOs are:
.. toctree::
:maxdepth: 2
remote-submit

51
dolphinscheduler-python/pydolphinscheduler/docs/source/howto/remote-submit.rst

@ -1,51 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Submit Your Code from Different machine
=======================================
Generally, we use pydolphinscheduler as a client to DolphinScheduler, and consider we may change our workflow
code frequently, the best practice is running :ref:`python gateway service <start:start python gateway service>`
in your server machine and submit the workflow code from your development machine, like a laptop or PC. This behavior
is supported by pydolphinscheduler out of box with one or two single command lines.
Export Configuration File
-------------------------
.. code-block:: bash
pydolphinscheduler config --init
your could find more detail in :ref:`configuration exporting <config:export configuration file>`
Run API Server in Other Host
----------------------------
.. code-block:: bash
pydolphinscheduler config --set java_gateway.address <your-api-server-ip-or-hostname>
your could find more detail in :ref:`configuration setting <config:change configuration>`
Run API Server in Other Port
----------------------------
.. code-block:: bash
pydolphinscheduler config --set java_gateway.port <your-python-gateway-service-port>
your could find more detail in :ref:`configuration setting <config:change configuration>`

171
dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst

@ -1,171 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Getting Started
===============
To get started with *PyDolphinScheduler* you must ensure python and pip
installed on your machine, if you're already set up, you can skip straight
to `Installing PyDolphinScheduler`_, otherwise please continue with
`Installing Python`_.
Installing Python
-----------------
How to install `python` and `pip` depends on what operating system
you're using. The python wiki provides up to date
`instructions for all platforms here`_. When you entering the website
and choice your operating system, you would be offered the choice and
select python version. *PyDolphinScheduler* recommend use version above
Python 3.6 and we highly recommend you install *Stable Releases* instead
of *Pre-releases*.
After you have download and installed Python, you should open your terminal,
typing and running :code:`python --version` to check whether the installation
is correct or not. If all thing good, you could see the version in console
without error(here is a example after Python 3.8.7 installed)
.. code-block:: bash
python --version
Will see detail of Python version, such as *Python 3.8.7*
Installing PyDolphinScheduler
-----------------------------
After Python is already installed on your machine following section
`installing Python`_, it easy to *PyDolphinScheduler* by pip.
.. code-block:: bash
python -m pip install apache-dolphinscheduler
The latest version of *PyDolphinScheduler* would be installed after you run above
command in your terminal. You could go and `start Python Gateway Service`_ to finish
the prepare, and then go to :doc:`tutorial` to make your hand dirty. But if you
want to install the unreleased version of *PyDolphinScheduler*, you could go and see
section `installing PyDolphinScheduler in dev branch`_ for more detail.
.. note::
Currently, we released multiple pre-release package in PyPI, you can see all released package
including pre-release in `release history <https://pypi.org/project/apache-dolphinscheduler/#history>`_.
You can fix the the package version if you want to install pre-release package, for example if
you want to install version `3.0.0-beta-2` package, you can run command
:code:`python -m pip install apache-dolphinscheduler==3.0.0b2`.
Installing PyDolphinScheduler In DEV Branch
-------------------------------------------
Because the project is developing and some of the features still not release.
If you want to try some thing unreleased you could install from the source code
which we hold in GitHub
.. code-block:: bash
# Clone Apache DolphinScheduler repository
git clone git@github.com:apache/dolphinscheduler.git
# Install PyDolphinScheduler in develop mode
cd dolphinscheduler-python/pydolphinscheduler && python -m pip install -e .
After you installed *PyDolphinScheduler*, please remember `start Python Gateway Service`_
which waiting for *PyDolphinScheduler*'s workflow definition require.
Above command will clone whole dolphinscheduler source code to local, maybe you want to install latest pydolphinscheduler
package directly and do not care about other code(including Python gateway service code), you can execute command
.. code-block:: bash
# Must escape the '&' character by adding '\'
pip install -e "git+https://github.com/apache/dolphinscheduler.git#egg=apache-dolphinscheduler&subdirectory=dolphinscheduler-python/pydolphinscheduler"
Start Python Gateway Service
----------------------------
Since **PyDolphinScheduler** is Python API for `Apache DolphinScheduler`_, it
could define workflow and tasks structure, but could not run it unless you
`install Apache DolphinScheduler`_ and start its API server which including
Python gateway service in it. We only and some key steps here and you could
go `install Apache DolphinScheduler`_ for more detail
.. code-block:: bash
# Start DolphinScheduler api-server which including python gateway service
./bin/dolphinscheduler-daemon.sh start api-server
To check whether the server is alive or not, you could run :code:`jps`. And
the server is health if keyword `ApiApplicationServer` in the console.
.. code-block:: bash
jps
# ....
# 201472 ApiApplicationServer
# ....
.. note::
Please make sure you already enabled started Python gateway service along with `api-server`. The configuration is in
yaml config path `python-gateway.enabled : true` in api-server's configuration path in `api-server/conf/application.yaml`.
The default value is true and Python gateway service start when api server is been started.
Run an Example
--------------
Before run an example for pydolphinscheduler, you should get the example code from it source code. You could run
single bash command to get it
.. code-block:: bash
wget https://raw.githubusercontent.com/apache/dolphinscheduler/dev/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py
or you could copy-paste the content from `tutorial source code`_. And then you could run the example in your
terminal
.. code-block:: bash
python tutorial.py
If you want to submit your workflow to a remote API server, which means that your workflow script is different
from the API server, you should first change pydolphinscheduler configuration and then submit the workflow script
.. code-block:: bash
pydolphinscheduler config --init
pydolphinscheduler config --set java_gateway.address <YOUR-API-SERVER-IP-OR-HOSTNAME>
python tutorial.py
.. note::
You could see more information in :doc:`config` about all the configurations pydolphinscheduler supported.
After that, you could go and see your DolphinScheduler web UI to find out a new workflow created by pydolphinscheduler,
and the path of web UI is `Project -> Workflow -> Workflow Definition`.
What's More
-----------
If you do not familiar with *PyDolphinScheduler*, you could go to :doc:`tutorial` and see how it works. But
if you already know the basic usage or concept of *PyDolphinScheduler*, you could go and play with all
:doc:`tasks/index` *PyDolphinScheduler* supports, or see our :doc:`howto/index` about useful cases.
.. _`instructions for all platforms here`: https://wiki.python.org/moin/BeginnersGuide/Download
.. _`Apache DolphinScheduler`: https://dolphinscheduler.apache.org
.. _`install Apache DolphinScheduler`: https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/installation/standalone.html
.. _`tutorial source code`: https://raw.githubusercontent.com/apache/dolphinscheduler/dev/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py

40
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst

@ -1,40 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Condition
=========
A condition task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_condition_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.condition
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Condition.yaml
:start-after: # under the License.
:language: yaml

46
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst

@ -1,46 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Datax
=====
A DataX task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_datax_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.datax
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/DataX.yaml
:start-after: # under the License.
:language: yaml
example_datax.json:
.. literalinclude:: ../../../examples/yaml_define/example_datax.json
:language: json

47
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst

@ -1,47 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Dependent
=========
A dependent task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_dependent_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.dependent
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Dependent.yaml
:start-after: # under the License.
:language: yaml
Dependent_External.yaml:
.. literalinclude:: ../../../examples/yaml_define/Dependent_External.yaml
:start-after: # under the License.
:language: yaml

41
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dvc.rst

@ -1,41 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
DVC
===
A DVC task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_dvc_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.dvc
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Dvc.yaml
:start-after: # under the License.
:language: yaml

40
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst

@ -1,40 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Flink
=====
A flink task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_flink_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.flink
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Flink.yaml
:start-after: # under the License.
:language: yaml

33
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/func_wrap.rst

@ -1,33 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Python Function Wrapper
=======================
A decorator covert Python function into pydolphinscheduler's task.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/tutorial_decorator.py
:start-after: [start tutorial]
:end-before: [end tutorial]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.func_wrap

29
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst

@ -1,29 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
HTTP
====
.. automodule:: pydolphinscheduler.tasks.http
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Http.yaml
:start-after: # under the License.
:language: yaml

48
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst

@ -1,48 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Tasks
=====
In this section
.. toctree::
:maxdepth: 1
func_wrap
shell
sql
python
http
switch
condition
dependent
spark
flink
map_reduce
procedure
datax
sub_process
sagemaker
mlflow
openmldb
pytorch
dvc

42
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst

@ -1,42 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Map Reduce
==========
A Map Reduce task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_map_reduce_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.map_reduce
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/MapReduce.yaml
:start-after: # under the License.
:language: yaml

42
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/mlflow.rst

@ -1,42 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
MLflow
=========
A MLflow task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_mlflow_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.mlflow
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/mlflow.yaml
:start-after: # under the License.
:language: yaml

42
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/openmldb.rst

@ -1,42 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
OpenMLDB
=========
A OpenMLDB task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_openmldb_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.openmldb
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/OpenMLDB.yaml
:start-after: # under the License.
:language: yaml

29
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst

@ -1,29 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Procedure
=========
.. automodule:: pydolphinscheduler.tasks.procedure
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Procedure.yaml
:start-after: # under the License.
:language: yaml

29
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst

@ -1,29 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Python
======
.. automodule:: pydolphinscheduler.tasks.python
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Python.yaml
:start-after: # under the License.
:language: yaml

42
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/pytorch.rst

@ -1,42 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Pytorch
=======
A Pytorch task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_pytorch_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.pytorch
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Pytorch.yaml
:start-after: # under the License.
:language: yaml

46
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sagemaker.rst

@ -1,46 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
SageMaker
=========
A SageMaker task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_sagemaker_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.sagemaker
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Sagemaker.yaml
:start-after: # under the License.
:language: yaml
example_sagemaker_params.json:
.. literalinclude:: ../../../examples/yaml_define/example_sagemaker_params.json
:language: json

41
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst

@ -1,41 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Shell
=====
A shell task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/tutorial.py
:start-after: [start workflow_declare]
:end-before: [end task_relation_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.shell
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Shell.yaml
:start-after: # under the License.
:language: yaml

41
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst

@ -1,41 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Spark
=====
A spark task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_spark_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.spark
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Spark.yaml
:start-after: # under the License.
:language: yaml

35
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst

@ -1,35 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
SQL
===
.. automodule:: pydolphinscheduler.tasks.sql
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Sql.yaml
:start-after: # under the License.
:language: yaml
example_sql.sql:
.. literalinclude:: ../../../examples/yaml_define/example_sql.sql
:start-after: */
:language: sql

38
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst

@ -1,38 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Sub Process
===========
.. automodule:: pydolphinscheduler.tasks.sub_process
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/SubProcess.yaml
:start-after: # under the License.
:language: yaml
example_subprocess.yaml:
.. literalinclude:: ../../../examples/yaml_define/example_sub_workflow.yaml
:start-after: # under the License.
:language: yaml

42
dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst

@ -1,42 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Switch
======
A switch task type's example and dive into information of **PyDolphinScheduler**.
Example
-------
.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_switch_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
Dive Into
---------
.. automodule:: pydolphinscheduler.tasks.switch
YAML file example
-----------------
.. literalinclude:: ../../../examples/yaml_define/Switch.yaml
:start-after: # under the License.
:language: yaml

319
dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst

@ -1,319 +0,0 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
Tutorial
========
This tutorial shows you the basic concept of *PyDolphinScheduler* and tells all
things you should know before you submit or run your first workflow. If you
still have not installed *PyDolphinScheduler* and start DolphinScheduler, you
could go and see :ref:`how to getting start PyDolphinScheduler <start:getting started>` firstly.
Overview of Tutorial
--------------------
Here have an overview of our tutorial, and it looks a little complex but does not
worry about that because we explain this example below as detail as possible.
There are two types of tutorials: traditional and task decorator.
- **Traditional Way**: More general, support many :doc:`built-in task types <tasks/index>`, it is convenient
when you build your workflow at the beginning.
- **Task Decorator**: A Python decorator allow you to wrap your function into pydolphinscheduler's task. Less
versatility to the traditional way because it only supported Python functions and without build-in tasks
supported. But it is helpful if your workflow is all built with Python or if you already have some Python
workflow code and want to migrate them to pydolphinscheduler.
- **YAML File**: We can use pydolphinscheduler CLI to create process using YAML file: :code:`pydolphinscheduler yaml -f tutorial.yaml`.
We can find more YAML file examples in `examples/yaml_define <https://github.com/apache/dolphinscheduler/tree/dev/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define>`_
.. tab:: Tradition
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py
:dedent: 0
:start-after: [start tutorial]
:end-before: [end tutorial]
.. tab:: Task Decorator
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py
:dedent: 0
:start-after: [start tutorial]
:end-before: [end tutorial]
.. tab:: YAML File
.. literalinclude:: ../../examples/yaml_define/tutorial.yaml
:start-after: # under the License.
:language: yaml
Import Necessary Module
-----------------------
First of all, we should import the necessary module which we would use later just like other Python packages.
.. tab:: Tradition
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py
:dedent: 0
:start-after: [start package_import]
:end-before: [end package_import]
In tradition tutorial we import :class:`pydolphinscheduler.core.process_definition.ProcessDefinition` and
:class:`pydolphinscheduler.tasks.shell.Shell`.
If you want to use other task type you could click and :doc:`see all tasks we support <tasks/index>`
.. tab:: Task Decorator
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py
:dedent: 0
:start-after: [start package_import]
:end-before: [end package_import]
In task decorator tutorial we import :class:`pydolphinscheduler.core.process_definition.ProcessDefinition` and
:func:`pydolphinscheduler.tasks.func_wrap.task`.
Process Definition Declaration
------------------------------
We should instantiate :class:`pydolphinscheduler.core.process_definition.ProcessDefinition` object after we
import them from `import necessary module`_. Here we declare basic arguments for process definition(aka, workflow).
We define the name of :code:`ProcessDefinition`, using `Python context manager`_ and it **the only required argument**
for `ProcessDefinition`. Besides, we also declare three arguments named :code:`schedule` and :code:`start_time`
which setting workflow schedule interval and schedule start_time, and argument :code:`tenant` defines which tenant
will be running this task in the DolphinScheduler worker. See :ref:`section tenant <concept:tenant>` in
*PyDolphinScheduler* :doc:`concept` for more information.
.. tab:: Tradition
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py
:dedent: 0
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
.. tab:: Task Decorator
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py
:dedent: 0
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]
.. tab:: YAML File
.. literalinclude:: ../../examples/yaml_define/tutorial.yaml
:start-after: # under the License.
:end-before: # Define the tasks under the workflow
:language: yaml
We could find more detail about :code:`ProcessDefinition` in :ref:`concept about process definition <concept:process definition>`
if you are interested in it. For all arguments of object process definition, you could find in the
:class:`pydolphinscheduler.core.process_definition` API documentation.
Task Declaration
----------------
.. tab:: Tradition
We declare four tasks to show how to create tasks, and both of them are simple tasks of
:class:`pydolphinscheduler.tasks.shell` which runs `echo` command in the terminal. Besides the argument
`command` with :code:`echo` command, we also need to set the argument `name` for each task
*(not only shell task, `name` is required for each type of task)*.
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py
:dedent: 0
:start-after: [start task_declare]
:end-before: [end task_declare]
Besides shell task, *PyDolphinScheduler* supports multiple tasks and you could find in :doc:`tasks/index`.
.. tab:: Task Decorator
We declare four tasks to show how to create tasks, and both of them are created by the task decorator which
using :func:`pydolphinscheduler.tasks.func_wrap.task`. All we have to do is add a decorator named
:code:`@task` to existing Python function, and then use them inside :class:`pydolphinscheduler.core.process_definition`
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py
:dedent: 0
:start-after: [start task_declare]
:end-before: [end task_declare]
It makes our workflow more Pythonic, but be careful that when we use task decorator mode mean we only use
Python function as a task and could not use the :doc:`built-in tasks <tasks/index>` most of the cases.
.. tab:: YAML File
.. literalinclude:: ../../examples/yaml_define/tutorial.yaml
:start-after: # Define the tasks under the workflow
:language: yaml
Setting Task Dependence
-----------------------
After we declare both process definition and task, we have four tasks that are independent and will be running
in parallel. If you want to start one task until some task is finished, you have to set dependence on those
tasks.
Set task dependence is quite easy by task's attribute :code:`set_downstream` and :code:`set_upstream` or by
bitwise operators :code:`>>` and :code:`<<`
In this tutorial, task `task_parent` is the leading task of the whole workflow, then task `task_child_one` and
task `task_child_two` are its downstream tasks. Task `task_union` will not run unless both task `task_child_one`
and task `task_child_two` was done, because both two task is `task_union`'s upstream.
.. tab:: Tradition
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py
:dedent: 0
:start-after: [start task_relation_declare]
:end-before: [end task_relation_declare]
.. tab:: Task Decorator
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py
:dedent: 0
:start-after: [start task_relation_declare]
:end-before: [end task_relation_declare]
.. tab:: YAML File
We can use :code:`deps:[]` to set task dependence
.. literalinclude:: ../../examples/yaml_define/tutorial.yaml
:start-after: # Define the tasks under the workflow
:language: yaml
.. note::
We could set task dependence in batch mode if they have the same downstream or upstream by declaring those
tasks as task groups. In tutorial, We declare task `task_child_one` and `task_child_two` as task group named
`task_group`, then set `task_group` as downstream of task `task_parent`. You could see more detail in
:ref:`concept:Tasks Dependence` for more detail about how to set task dependence.
Submit Or Run Workflow
----------------------
After that, we finish our workflow definition, with four tasks and task dependence, but all these things are
local, we should let the DolphinScheduler daemon know how the definition of workflow. So the last thing we
have to do is submit the workflow to the DolphinScheduler daemon.
Fortunately, we have a convenient method to submit workflow via `ProcessDefinition` attribute :code:`run` which
will create workflow definition as well as workflow schedule.
.. tab:: Tradition
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py
:dedent: 0
:start-after: [start submit_or_run]
:end-before: [end submit_or_run]
.. tab:: Task Decorator
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py
:dedent: 0
:start-after: [start submit_or_run]
:end-before: [end submit_or_run]
.. tab:: YAML File
pydolphinscheduler YAML CLI always submit workflow. We can run the workflow if we set :code:`run: true`
.. code-block:: yaml
# Define the workflow
workflow:
name: "tutorial"
run: true
At last, we could execute this workflow code in your terminal like other Python scripts, running
:code:`python tutorial.py` to trigger and execute it.
.. note::
If you do not start your DolphinScheduler API server, you could find how to start it in
:ref:`start:start Python gateway service` for more detail. Besides attribute :code:`run`, we have attribute
:code:`submit` for object `ProcessDefinition` which just submits workflow to the daemon but does not set
the workflow schedule information. For more detail, you could see :ref:`concept:process definition`.
DAG Graph After Tutorial Run
----------------------------
After we run the tutorial code, you could log in DolphinScheduler web UI, go and see the
`DolphinScheduler project page`_. They is a new process definition be created by *PyDolphinScheduler* and it
named "tutorial" or "tutorial_decorator". The task graph of workflow like below:
.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py
:language: text
:lines: 24-28
Create Process Using YAML File
------------------------------
We can use pydolphinscheduler CLI to create process using YAML file
.. code-block:: bash
pydolphinscheduler yaml -f Shell.yaml
We can use the following four special grammars to define workflows more flexibly.
- :code:`$FILE{"file_name"}`: Read the file (:code:`file_name`) contents and replace them to that location.
- :code:`$WORKFLOW{"other_workflow.yaml"}`: Refer to another process defined using YAML file (:code:`other_workflow.yaml`) and replace the process name in this location.
- :code:`$ENV{env_name}`: Read the environment variable (:code:`env_name`) and replace it to that location.
- :code:`${CONFIG.key_name}`: Read the configuration value of key (:code:`key_name`) and it them to that location.
In addition, when loading the file path use :code:`$FILE{"file_name"}` or :code:`$WORKFLOW{"other_workflow.yaml"}`, pydolphinscheduler will search in the path of the YAMl file if the file does not exist.
For exmaples, our file directory structure is as follows:
.. code-block:: bash
.
└── yaml_define
├── Condition.yaml
├── DataX.yaml
├── Dependent_External.yaml
├── Dependent.yaml
├── example_datax.json
├── example_sql.sql
├── example_subprocess.yaml
├── Flink.yaml
├── Http.yaml
├── MapReduce.yaml
├── MoreConfiguration.yaml
├── Procedure.yaml
├── Python.yaml
├── Shell.yaml
├── Spark.yaml
├── Sql.yaml
├── SubProcess.yaml
└── Switch.yaml
After we run
.. code-block:: bash
pydolphinscheduler yaml -file yaml_define/SubProcess.yaml
the :code:`$WORKFLOW{"example_sub_workflow.yaml"}` will be set to :code:`$WORKFLOW{"yaml_define/example_sub_workflow.yaml"}`, because :code:`./example_subprocess.yaml` does not exist and :code:`yaml_define/example_sub_workflow.yaml` does.
Furthermore, this feature supports recursion all the way down.
.. _`DolphinScheduler project page`: https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/project.html
.. _`Python context manager`: https://docs.python.org/3/library/stdtypes.html#context-manager-types

43
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Condition.yaml

@ -1,43 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "Condition"
# Define the tasks under the workflow
tasks:
- { "task_type": "Shell", "name": "pre_task_1", "command": "echo pre_task_1" }
- { "task_type": "Shell", "name": "pre_task_2", "command": "echo pre_task_2" }
- { "task_type": "Shell", "name": "pre_task_3", "command": "echo pre_task_3" }
- { "task_type": "Shell", "name": "success_branch", "command": "echo success_branch" }
- { "task_type": "Shell", "name": "fail_branch", "command": "echo fail_branch" }
- name: condition
task_type: Condition
success_task: success_branch
failed_task: fail_branch
op: AND
groups:
- op: AND
groups:
- task: pre_task_1
flag: true
- task: pre_task_2
flag: true
- task: pre_task_3
flag: false

33
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/DataX.yaml

@ -1,33 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "DataX"
# Define the tasks under the workflow
tasks:
- name: task
task_type: DataX
datasource_name: db
datatarget_name: db
sql: show tables;
target_table: table_test
- name: task_custon_config
task_type: CustomDataX
json: $FILE{"example_datax.json"}

76
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent.yaml

@ -1,76 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
workflow:
name: "Dependent"
# Define the tasks under the workflow
tasks:
- name: dependent
task_type: Dependent
denpendence:
op: and
groups:
- op: or
groups:
- project_name: pydolphin
process_definition_name: task_dependent_external
dependent_task_name: task_1
- project_name: pydolphin
process_definition_name: task_dependent_external
dependent_task_name: task_2
- op: and
groups:
- project_name: pydolphin
process_definition_name: task_dependent_external
dependent_task_name: task_1
dependent_date: LAST_WEDNESDAY
- project_name: pydolphin
process_definition_name: task_dependent_external
dependent_task_name: task_2
dependent_date: last24Hours
- name: dependent_var
task_type: Dependent
denpendence:
op: and
groups:
- op: or
# we can use ${CONFIG.WORKFLOW_PROJECT} to set the value to configuration.WORKFLOW_PROJECT
# we can use $WORKFLOW{"Dependent_External.yaml"} to create or update a workflow from dependent_external.yaml and set the value to that workflow name
groups:
- project_name: ${CONFIG.WORKFLOW_PROJECT}
process_definition_name: $WORKFLOW{"Dependent_External.yaml"}
dependent_task_name: task_1
- project_name: ${CONFIG.WORKFLOW_PROJECT}
process_definition_name: $WORKFLOW{"Dependent_External.yaml"}
dependent_task_name: task_2
- op: and
groups:
- project_name: ${CONFIG.WORKFLOW_PROJECT}
process_definition_name: $WORKFLOW{"Dependent_External.yaml"}
dependent_task_name: task_1
dependent_date: LAST_WEDNESDAY
- project_name: ${CONFIG.WORKFLOW_PROJECT}
process_definition_name: $WORKFLOW{"Dependent_External.yaml"}
dependent_task_name: task_2
dependent_date: last24Hours

26
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent_External.yaml

@ -1,26 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "task_dependent_external"
# Define the tasks under the workflow
tasks:
- { "task_type": "Shell", "name": "task_1", "command": "echo task 1" }
- { "task_type": "Shell", "name": "task_2", "command": "echo task 2" }
- { "task_type": "Shell", "name": "task_3", "command": "echo task 3" }

46
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dvc.yaml

@ -1,46 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define variable `repository`
repository: &repository "git@github.com:<YOUR-NAME-OR-ORG>/dvc-data-repository-example.git"
# Define the workflow
workflow:
name: "DVC"
release_state: "offline"
# Define the tasks under the process
tasks:
- name: init_dvc
task_type: DVCInit
repository: *repository
store_url: ~/dvc_data
- name: upload_data
task_type: DVCUpload
repository: *repository
data_path_in_dvc_repository: "iris"
data_path_in_worker: ~/source/iris
version: v1
message: upload iris data v1
- name: download_data
task_type: DVCDownload
repository: *repository
data_path_in_dvc_repository: "iris"
data_path_in_worker: ~/target/iris
version: v1

29
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Flink.yaml

@ -1,29 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "Flink"
# Define the tasks under the workflow
tasks:
- name: task
task_type: Flink
main_class: org.apache.flink.streaming.examples.wordcount.WordCount
main_package: test_java.jar
program_type: JAVA
deploy_mode: local

37
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Http.yaml

@ -1,37 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "Http"
# Define the tasks under the workflow
tasks:
- name: task
task_type: Http
url: "https://httpbin.org/get"
http_method: "GET"
http_params:
- { "prop": "a", "httpParametersType": "PARAMETER", "value": "1" }
- { "prop": "b", "httpParametersType": "PARAMETER", "value": "2" }
- {
"prop": "Content-Type",
"httpParametersType": "header",
"value": "test",
}
http_check_condition: "STATUS_CODE_CUSTOM"
condition: "404"

29
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MapReduce.yaml

@ -1,29 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "MapReduce"
# Define the tasks under the workflow
tasks:
- name: task
task_type: MR
main_class: wordcount
main_package: test_java.jar
program_type: SCALA
main_args: /dolphinscheduler/tenant_exists/resources/file.txt /output/ds

40
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MoreConfiguration.yaml

@ -1,40 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "MoreConfiguration"
param:
n: 1
# Define the tasks under the workflow
tasks:
- name: shell_0
task_type: Shell
description: "yaml define task"
flag: "YES"
command: |
echo "$ENV{HOME}"
echo "${n}"
task_priority: "HIGH"
delay_time: 20
fail_retry_times: 30
fail_retry_interval: 5
timeout_flag: "CLOSE"
timeout: 60
local_params:
- { "prop": "n", "direct": "IN", "type": "VARCHAR", "value": "${n}" }

33
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/OpenMLDB.yaml

@ -1,33 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "OpenMLDB"
# Define the tasks under the workflow
tasks:
- name: OpenMLDB
task_type: OpenMLDB
zookeeper: "127.0.0.1:2181"
zookeeper_path: "/openmldb"
execute_mode: "online"
sql: |
USE demo_db;
set @@job_timeout=200000;
LOAD DATA INFILE 'file:///tmp/train_sample.csv'
INTO TABLE talkingdata OPTIONS(mode='overwrite');

27
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Procedure.yaml

@ -1,27 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "Procedure"
# Define the tasks under the workflow
tasks:
- name: task
task_type: Procedure
datasource_name: db
method: show tables;

30
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Python.yaml

@ -1,30 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "Python"
# Define the tasks under the workflow
tasks:
- name: python
task_type: Python
definition: |
import os
print(os)
print("1")
print("2")

53
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Pytorch.yaml

@ -1,53 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "Pytorch"
# Define the tasks under the workflow
tasks:
# run project with existing environment
- name: task_existing_env
task_type: pytorch
script: main.py
script_params: --dry-run --no-cuda
project_path: https://github.com/pytorch/examples#mnist
python_command: /home/anaconda3/envs/pytorch/bin/python3
# run project with creating conda environment
- name: task_conda_env
task_type: pytorch
script: main.py
script_params: --dry-run --no-cuda
project_path: https://github.com/pytorch/examples#mnist
is_create_environment: True
python_env_tool: conda
requirements: requirements.txt
conda_python_version: 3.7
# run project with creating virtualenv environment
- name: task_virtualenv_env
task_type: pytorch
script: main.py
script_params: --dry-run --no-cuda
project_path: https://github.com/pytorch/examples#mnist
is_create_environment: True
python_env_tool: virtualenv
requirements: requirements.txt

28
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sagemaker.yaml

@ -1,28 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "Sagemaker"
release_state: "offline"
# Define the tasks under the process
tasks:
- name: sagemaker
task_type: Sagemaker
sagemaker_request_json: $FILE{"example_sagemaker_params.json"}

40
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Shell.yaml

@ -1,40 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "Shell"
release_state: "offline"
run: true
# Define the tasks under the process
tasks:
- name: task_parent
task_type: Shell
command: |
echo hello pydolphinscheduler
echo run task parent
- name: task_child_one
task_type: Shell
deps: [task_parent]
command: echo "child one"
- name: task_child_two
task_type: Shell
deps: [task_parent]
command: echo "child two"

45
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sql.yaml

@ -1,45 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "Sql"
# Define the tasks under the workflow
tasks:
- name: task_base
task_type: Sql
datasource_name: "db"
sql: show tables;
- name: task_multi_line
task_type: Sql
datasource_name: "db"
sql: |
show tables;
select id from version where id=1;
- name: task_file
task_type: Sql
datasource_name: "db"
sql: $FILE{"example_sql.sql"}
# Or you can define task "task_union" it with one line
- { "task_type": "Sql", "name": "task_base_one_line", "datasource_name": "db", "sql": "select id from version where id=1;"}
# Or you can define task "task_union" it with one line
- { "task_type": "Sql", "name": "task_file_one_line", "datasource_name": "db", "sql": '$FILE{"example_sql.sql"}'}

27
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/SubProcess.yaml

@ -1,27 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "SubWorkflow"
tasks:
- name: example_workflow
task_type: SubProcess
process_definition_name: $WORKFLOW{"example_sub_workflow.yaml"}
- { "task_type": "Shell", "deps": [example_workflow], "name": "task_3", "command": "echo task 3" }

39
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Switch.yaml

@ -1,39 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "Switch"
param:
var: 1
# Define the tasks under the workflow
tasks:
- name: switch_child_1
task_type: Shell
command: echo switch_child_1
- name: switch_child_2
task_type: Shell
command: echo switch_child_2
- name: switch
task_type: Switch
condition:
- task: switch_child_1
condition: "${var} > 1"
- task: switch_child_2

62
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_datax.json

@ -1,62 +0,0 @@
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "usr",
"password": "pwd",
"column": [
"id",
"name",
"code",
"description"
],
"splitPk": "id",
"connection": [
{
"table": [
"source_table"
],
"jdbcUrl": [
"jdbc:mysql://127.0.0.1:3306/source_db"
]
}
]
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "usr",
"password": "pwd",
"column": [
"id",
"name"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://127.0.0.1:3306/target_db",
"table": [
"target_table"
]
}
]
}
}
}
],
"setting": {
"errorLimit": {
"percentage": 0,
"record": 0
},
"speed": {
"channel": 1,
"record": 1000
}
}
}
}

18
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sagemaker_params.json

@ -1,18 +0,0 @@
{
"ParallelismConfiguration":{
"MaxParallelExecutionSteps":1
},
"PipelineExecutionDescription":"run pipeline using ds",
"PipelineExecutionDisplayName":"ds-sagemaker-pipeline",
"PipelineName":"DsSagemakerPipeline",
"PipelineParameters":[
{
"Name":"InputData",
"Value": "s3://sagemaker/dataset/dataset.csv"
},
{
"Name":"InferenceData",
"Value": "s3://sagemaker/dataset/inference.csv"
}
]
}

22
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sql.sql

@ -1,22 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
select id from version where id=1;
select id from version where id=2;
select id from version where id=3;
select id from version where id=4;
select id from version where id=5;

26
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sub_workflow.yaml

@ -1,26 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "example_workflow_for_sub_workflow"
# Define the tasks under the workflow
tasks:
- { "task_type": "Shell", "name": "task_1", "command": "echo task 1" }
- { "task_type": "Shell", "deps": [task_1], "name": "task_2", "command": "echo task 2" }
- { "task_type": "Shell", "deps": [task_2], "name": "task_3", "command": "echo task 3" }

69
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml

@ -1,69 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define variable `mlflow_tracking_uri`
mlflow_tracking_uri: &mlflow_tracking_uri "http://127.0.0.1:5000"
# Define the workflow
workflow:
name: "MLflow"
# Define the tasks under the workflow
tasks:
- name: train_xgboost_native
task_type: MLFlowProjectsCustom
repository: https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native
mlflow_tracking_uri: *mlflow_tracking_uri
parameters: -P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9
experiment_name: xgboost
- name: train_automl
task_type: MLFlowProjectsAutoML
mlflow_tracking_uri: *mlflow_tracking_uri
parameters: time_budget=30;estimator_list=['lgbm']
experiment_name: automl_iris
model_name: iris_A
automl_tool: flaml
data_path: /data/examples/iris
- name: deploy_docker
task_type: MLflowModels
deps: [train_automl]
model_uri: models:/iris_A/Production
mlflow_tracking_uri: *mlflow_tracking_uri
deploy_mode: DOCKER
port: 7002
- name: train_basic_algorithm
task_type: MLFlowProjectsBasicAlgorithm
mlflow_tracking_uri: *mlflow_tracking_uri
parameters: n_estimators=200;learning_rate=0.2
experiment_name: basic_algorithm_iris
model_name: iris_B
algorithm: lightgbm
data_path: /data/examples/iris
search_params: max_depth=[5, 10];n_estimators=[100, 200]
- name: deploy_mlflow
deps: [train_basic_algorithm]
task_type: MLflowModels
model_uri: models:/iris_B/Production
mlflow_tracking_uri: *mlflow_tracking_uri
deploy_mode: MLFLOW
port: 7001

46
dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/tutorial.yaml

@ -1,46 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Define the workflow
workflow:
name: "tutorial"
schedule: "0 0 0 * * ? *"
start_time: "2021-01-01"
tenant: "tenant_exists"
release_state: "offline"
run: true
# Define the tasks under the workflow
tasks:
- name: task_parent
task_type: Shell
command: echo hello pydolphinscheduler
- name: task_child_one
task_type: Shell
deps: [task_parent]
command: echo "child one"
- name: task_child_two
task_type: Shell
deps: [task_parent]
command: echo "child two"
- name: task_union
task_type: Shell
deps: [task_child_one, task_child_two]
command: echo "union"

21
dolphinscheduler-python/pydolphinscheduler/pytest.ini

@ -1,21 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[pytest]
# add path here to skip pytest scan it
norecursedirs =
tests/testing
# Integration test run seperated which do not calculate coverage, it will run in `tox -e integrate-test`
tests/integration

16
dolphinscheduler-python/pydolphinscheduler/setup.cfg

@ -1,16 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

22
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py

@ -1,22 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init root of pydolphinscheduler."""
from pkg_resources import get_distribution
__version__ = get_distribution("apache-dolphinscheduler").version

18
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/__init__.py

@ -1,18 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Commands line interface of pydolphinscheduler."""

106
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/commands.py

@ -1,106 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Commands line interface's command of pydolphinscheduler."""
import click
from click import echo
import pydolphinscheduler
from pydolphinscheduler.configuration import (
get_single_config,
init_config_file,
set_single_config,
)
from pydolphinscheduler.core.yaml_process_define import create_process_definition
version_option_val = ["major", "minor", "micro"]
@click.group()
def cli():
"""Apache DolphinScheduler Python API's command line interface."""
@cli.command()
@click.option(
"--part",
"-p",
required=False,
type=click.Choice(version_option_val, case_sensitive=False),
multiple=False,
help="The part of version your want to get.",
)
def version(part: str) -> None:
"""Show current version of pydolphinscheduler."""
if part:
idx = version_option_val.index(part)
echo(f"{pydolphinscheduler.__version__.split('.')[idx]}")
else:
echo(f"{pydolphinscheduler.__version__}")
@cli.command()
@click.option(
"--init",
"-i",
is_flag=True,
help="Initialize and create configuration file to `PYDS_HOME`.",
)
@click.option(
"--set",
"-s",
"setter",
multiple=True,
type=click.Tuple([str, str]),
help="Set specific setting to config file."
"Use multiple ``--set <KEY> <VAL>`` options to set multiple configs",
)
@click.option(
"--get",
"-g",
"getter",
multiple=True,
type=str,
help="Get specific setting from config file."
"Use multiple ``--get <KEY>`` options to get multiple configs",
)
def config(getter, setter, init) -> None:
"""Manage the configuration for pydolphinscheduler."""
if init:
init_config_file()
elif getter:
click.echo("The configuration query as below:\n")
configs_kv = [f"{key} = {get_single_config(key)}" for key in getter]
click.echo("\n".join(configs_kv))
elif setter:
for key, val in setter:
set_single_config(key, val)
click.echo("Set configuration done.")
@cli.command()
@click.option(
"--yaml_file",
"-f",
required=True,
help="YAML file path",
type=click.Path(exists=True),
)
def yaml(yaml_file) -> None:
"""Create process definition using YAML file."""
create_process_definition(yaml_file)

193
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/configuration.py

@ -1,193 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Configuration module for pydolphinscheduler."""
import os
from pathlib import Path
from typing import Any
from pydolphinscheduler.exceptions import PyDSConfException
from pydolphinscheduler.utils import file
from pydolphinscheduler.utils.yaml_parser import YamlParser
BUILD_IN_CONFIG_PATH = Path(__file__).resolve().parent.joinpath("default_config.yaml")
def config_path() -> Path:
"""Get the path of pydolphinscheduler configuration file."""
pyds_home = os.environ.get("PYDS_HOME", "~/pydolphinscheduler")
config_file_path = Path(pyds_home).joinpath("config.yaml").expanduser()
return config_file_path
def get_configs() -> YamlParser:
"""Get all configuration settings from configuration file.
Will use custom configuration file first if it exists, otherwise default configuration file in
default path.
"""
path = str(config_path()) if config_path().exists() else BUILD_IN_CONFIG_PATH
with open(path, mode="r") as f:
return YamlParser(f.read())
def init_config_file() -> None:
"""Initialize configuration file by default configs."""
if config_path().exists():
raise PyDSConfException(
"Initialize configuration false to avoid overwrite configure by accident, file already exists "
"in %s, if you wan to overwrite the exists configure please remove the exists file manually.",
str(config_path()),
)
file.write(content=str(get_configs()), to_path=str(config_path()))
def get_single_config(key: str) -> Any:
"""Get single config to configuration file.
Support get from nested keys by delimiter ``.``.
For example, yaml config as below:
.. code-block:: yaml
one:
two1:
three: value1
two2: value2
you could get ``value1`` and ``value2`` by nested path
.. code-block:: python
value1 = get_single_config("one.two1.three")
value2 = get_single_config("one.two2")
:param key: The config key want to get it value.
"""
config = get_configs()
if key not in config:
raise PyDSConfException(
"Configuration path %s do not exists. Can not get configuration.", key
)
return config[key]
def set_single_config(key: str, value: Any) -> None:
"""Change single config to configuration file.
For example, yaml config as below:
.. code-block:: yaml
one:
two1:
three: value1
two2: value2
you could change ``value1`` to ``value3``, also change ``value2`` to ``value4`` by nested path assigned
.. code-block:: python
set_single_config["one.two1.three"] = "value3"
set_single_config["one.two2"] = "value4"
:param key: The config key want change.
:param value: The new value want to set.
"""
config = get_configs()
if key not in config:
raise PyDSConfException(
"Configuration path %s do not exists. Can not set configuration.", key
)
config[key] = value
file.write(content=str(config), to_path=str(config_path()), overwrite=True)
def get_int(val: Any) -> int:
"""Covert value to int."""
return int(val)
def get_bool(val: Any) -> bool:
"""Covert value to boolean."""
if isinstance(val, str):
return val.lower() in {"true", "t"}
elif isinstance(val, int):
return val == 1
else:
return bool(val)
# Start Common Configuration Settings
# Add configs as module variables to avoid read configuration multiple times when
# Get common configuration setting
# Set or get multiple configs in single time
configs: YamlParser = get_configs()
# Java Gateway Settings
JAVA_GATEWAY_ADDRESS = os.environ.get(
"PYDS_JAVA_GATEWAY_ADDRESS", configs.get("java_gateway.address")
)
JAVA_GATEWAY_PORT = get_int(
os.environ.get("PYDS_JAVA_GATEWAY_PORT", configs.get("java_gateway.port"))
)
JAVA_GATEWAY_AUTO_CONVERT = get_bool(
os.environ.get(
"PYDS_JAVA_GATEWAY_AUTO_CONVERT", configs.get("java_gateway.auto_convert")
)
)
# User Settings
USER_NAME = os.environ.get("PYDS_USER_NAME", configs.get("default.user.name"))
USER_PASSWORD = os.environ.get(
"PYDS_USER_PASSWORD", configs.get("default.user.password")
)
USER_EMAIL = os.environ.get("PYDS_USER_EMAIL", configs.get("default.user.email"))
USER_PHONE = str(os.environ.get("PYDS_USER_PHONE", configs.get("default.user.phone")))
USER_STATE = get_int(
os.environ.get("PYDS_USER_STATE", configs.get("default.user.state"))
)
# Workflow Settings
WORKFLOW_PROJECT = os.environ.get(
"PYDS_WORKFLOW_PROJECT", configs.get("default.workflow.project")
)
WORKFLOW_TENANT = os.environ.get(
"PYDS_WORKFLOW_TENANT", configs.get("default.workflow.tenant")
)
WORKFLOW_USER = os.environ.get(
"PYDS_WORKFLOW_USER", configs.get("default.workflow.user")
)
WORKFLOW_QUEUE = os.environ.get(
"PYDS_WORKFLOW_QUEUE", configs.get("default.workflow.queue")
)
WORKFLOW_RELEASE_STATE = os.environ.get(
"PYDS_WORKFLOW_RELEASE_STATE", configs.get("default.workflow.release_state")
)
WORKFLOW_WORKER_GROUP = os.environ.get(
"PYDS_WORKFLOW_WORKER_GROUP", configs.get("default.workflow.worker_group")
)
WORKFLOW_TIME_ZONE = os.environ.get(
"PYDS_WORKFLOW_TIME_ZONE", configs.get("default.workflow.time_zone")
)
WORKFLOW_WARNING_TYPE = os.environ.get(
"PYDS_WORKFLOW_WARNING_TYPE", configs.get("default.workflow.warning_type")
)
# End Common Configuration Setting

30
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py

@ -1,30 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init pydolphinscheduler.core package."""
from pydolphinscheduler.core.database import Database
from pydolphinscheduler.core.engine import Engine
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.core.task import Task
__all__ = [
"Database",
"Engine",
"ProcessDefinition",
"Task",
]

62
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/database.py

@ -1,62 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Module database."""
from typing import Dict
from py4j.protocol import Py4JJavaError
from pydolphinscheduler.exceptions import PyDSParamException
from pydolphinscheduler.java_gateway import JavaGate
class Database(dict):
"""database object, get information about database.
You provider database_name contain connection information, it decisions which
database type and database instance would run task.
"""
def __init__(self, database_name: str, type_key, database_key, *args, **kwargs):
super().__init__(*args, **kwargs)
self._database = {}
self.database_name = database_name
self[type_key] = self.database_type
self[database_key] = self.database_id
@property
def database_type(self) -> str:
"""Get database type from java gateway, a wrapper for :func:`get_database_info`."""
return self.get_database_info(self.database_name).get("type")
@property
def database_id(self) -> str:
"""Get database id from java gateway, a wrapper for :func:`get_database_info`."""
return self.get_database_info(self.database_name).get("id")
def get_database_info(self, name) -> Dict:
"""Get database info from java gateway, contains database id, type, name."""
if self._database:
return self._database
else:
try:
self._database = JavaGate().get_datasource_info(name)
# Handler database source do not exists error, for now we just terminate the process.
except Py4JJavaError as ex:
raise PyDSParamException(str(ex.java_exception))
return self._database

94
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/engine.py

@ -1,94 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Module engine."""
from typing import Dict, Optional
from py4j.protocol import Py4JJavaError
from pydolphinscheduler.core.task import Task
from pydolphinscheduler.exceptions import PyDSParamException
from pydolphinscheduler.java_gateway import JavaGate
class ProgramType(str):
"""Type of program engine runs, for now it just contain `JAVA`, `SCALA` and `PYTHON`."""
JAVA = "JAVA"
SCALA = "SCALA"
PYTHON = "PYTHON"
class Engine(Task):
"""Task engine object, declare behavior for engine task to dolphinscheduler.
This is the parent class of spark, flink and mr tasks,
and is used to provide the programType, mainClass and mainJar task parameters for reuse.
"""
def __init__(
self,
name: str,
task_type: str,
main_class: str,
main_package: str,
program_type: Optional[ProgramType] = ProgramType.SCALA,
*args,
**kwargs
):
super().__init__(name, task_type, *args, **kwargs)
self.main_class = main_class
self.main_package = main_package
self.program_type = program_type
self._resource = {}
def get_resource_info(self, program_type, main_package):
"""Get resource info from java gateway, contains resource id, name."""
if self._resource:
return self._resource
else:
try:
self._resource = JavaGate().get_resources_file_info(
program_type, main_package
)
# Handler source do not exists error, for now we just terminate the process.
except Py4JJavaError as ex:
raise PyDSParamException(str(ex.java_exception))
return self._resource
def get_jar_id(self) -> int:
"""Get jar id from java gateway, a wrapper for :func:`get_resource_info`."""
return self.get_resource_info(self.program_type, self.main_package).get("id")
@property
def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict:
"""Override Task.task_params for engine children task.
children task have some specials attribute for task_params, and is odd if we
directly set as python property, so we Override Task.task_params here.
"""
params = super().task_params
custom_params = {
"programType": self.program_type,
"mainClass": self.main_class,
"mainJar": {
"id": self.get_jar_id(),
},
}
params.update(custom_params)
return params

73
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource.py

@ -1,73 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Module resource."""
from typing import Optional
from pydolphinscheduler.exceptions import PyDSParamException
from pydolphinscheduler.java_gateway import JavaGate
from pydolphinscheduler.models import Base
class Resource(Base):
"""resource object, will define the resources that you want to create or update.
:param name: The fullname of resource.Includes path and suffix.
:param content: The description of resource.
:param description: The description of resource.
:param user_name: The user name of resource.
"""
_DEFINE_ATTR = {"name", "content", "description", "user_name"}
def __init__(
self,
name: str,
content: Optional[str] = None,
description: Optional[str] = None,
user_name: Optional[str] = None,
):
super().__init__(name, description)
self.content = content
self.user_name = user_name
self._resource_code = None
def get_info_from_database(self):
"""Get resource info from java gateway, contains resource id, name."""
if not self.user_name:
raise PyDSParamException(
"`user_name` is required when querying resources from python gate."
)
return JavaGate().query_resources_file_info(self.user_name, self.name)
def get_id_from_database(self):
"""Get resource id from java gateway."""
return self.get_info_from_database().getId()
def create_or_update_resource(self):
"""Create or update resource via java gateway."""
if not self.content or not self.user_name:
raise PyDSParamException(
"`user_name` and `content` are required when create or update resource from python gate."
)
JavaGate().create_or_update_resource(
self.user_name,
self.name,
self.content,
self.description,
)

466
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/yaml_process_define.py

@ -1,466 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Parse YAML file to create process."""
import logging
import os
import re
from pathlib import Path
from typing import Any, Dict
from pydolphinscheduler import configuration, tasks
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.core.task import Task
from pydolphinscheduler.exceptions import PyDSTaskNoFoundException
from pydolphinscheduler.utils.yaml_parser import YamlParser
logger = logging.getLogger(__file__)
KEY_PROCESS = "workflow"
KEY_TASK = "tasks"
KEY_TASK_TYPE = "task_type"
KEY_DEPS = "deps"
KEY_OP = "op"
TASK_SPECIAL_KEYS = [KEY_TASK_TYPE, KEY_DEPS]
class ParseTool:
"""Enhanced parsing tools."""
@staticmethod
def parse_string_param_if_file(string_param: str, **kwargs):
"""Use $FILE{"data_path"} to load file from "data_path"."""
if string_param.startswith("$FILE"):
path = re.findall(r"\$FILE\{\"(.*?)\"\}", string_param)[0]
base_folder = kwargs.get("base_folder", ".")
path = ParseTool.get_possible_path(path, base_folder)
with open(path, "r") as read_file:
string_param = "".join(read_file)
return string_param
@staticmethod
def parse_string_param_if_env(string_param: str, **kwargs):
"""Use $ENV{env_name} to load environment variable "env_name"."""
if "$ENV" in string_param:
key = re.findall(r"\$ENV\{(.*?)\}", string_param)[0]
env_value = os.environ.get(key, "$%s" % key)
string_param = string_param.replace("$ENV{%s}" % key, env_value)
return string_param
@staticmethod
def parse_string_param_if_config(string_param: str, **kwargs):
"""Use ${CONFIG.var_name} to load variable "var_name" from configuration."""
if "${CONFIG" in string_param:
key = re.findall(r"\$\{CONFIG\.(.*?)\}", string_param)[0]
if hasattr(configuration, key):
string_param = getattr(configuration, key)
else:
string_param = configuration.get_single_config(key)
return string_param
@staticmethod
def get_possible_path(file_path, base_folder):
"""Get file possible path.
Return new path if file_path is not exists, but base_folder + file_path exists
"""
possible_path = file_path
if not Path(file_path).exists():
new_path = Path(base_folder).joinpath(file_path)
if new_path.exists():
possible_path = new_path
logger.info(f"{file_path} not exists, convert to {possible_path}")
return possible_path
def get_task_cls(task_type) -> Task:
"""Get the task class object by task_type (case compatible)."""
# only get task class from tasks.__all__
all_task_types = {type_.capitalize(): type_ for type_ in tasks.__all__}
task_type_cap = task_type.capitalize()
if task_type_cap not in all_task_types:
raise PyDSTaskNoFoundException("cant not find task %s" % task_type)
standard_name = all_task_types[task_type_cap]
return getattr(tasks, standard_name)
class YamlProcess(YamlParser):
"""Yaml parser for create process.
:param yaml_file: yaml file path.
examples1 ::
parser = YamlParser(yaml_file=...)
parser.create_process_definition()
examples2 ::
YamlParser(yaml_file=...).create_process_definition()
"""
_parse_rules = [
ParseTool.parse_string_param_if_file,
ParseTool.parse_string_param_if_env,
ParseTool.parse_string_param_if_config,
]
def __init__(self, yaml_file: str):
with open(yaml_file, "r") as f:
content = f.read()
self._base_folder = Path(yaml_file).parent
content = self.prepare_refer_process(content)
super().__init__(content)
def create_process_definition(self):
"""Create process main function."""
# get process parameters with key "workflow"
process_params = self[KEY_PROCESS]
# pop "run" parameter, used at the end
is_run = process_params.pop("run", False)
# use YamlProcess._parse_rules to parse special value of yaml file
process_params = self.parse_params(process_params)
process_name = process_params["name"]
logger.info(f"Create Process: {process_name}")
with ProcessDefinition(**process_params) as pd:
# save dependencies between tasks
dependencies = {}
# save name and task mapping
name2task = {}
# get task datas with key "tasks"
for task_data in self[KEY_TASK]:
task = self.parse_task(task_data, name2task)
deps = task_data.get(KEY_DEPS, [])
if deps:
dependencies[task.name] = deps
name2task[task.name] = task
# build dependencies between task
for downstream_task_name, deps in dependencies.items():
downstream_task = name2task[downstream_task_name]
for upstream_task_name in deps:
upstream_task = name2task[upstream_task_name]
upstream_task >> downstream_task
pd.submit()
# if set is_run, run the process after submit
if is_run:
logger.info(f"run workflow: {pd}")
pd.run()
return process_name
def parse_params(self, params: Any):
"""Recursively resolves the parameter values.
The function operates params only when it encounters a string; other types continue recursively.
"""
if isinstance(params, str):
for parse_rule in self._parse_rules:
params_ = params
params = parse_rule(params, base_folder=self._base_folder)
if params_ != params:
logger.info(f"parse {params_} -> {params}")
elif isinstance(params, list):
for index in range(len(params)):
params[index] = self.parse_params(params[index])
elif isinstance(params, dict):
for key, value in params.items():
params[key] = self.parse_params(value)
return params
@classmethod
def parse(cls, yaml_file: str):
"""Recursively resolves the parameter values.
The function operates params only when it encounters a string; other types continue recursively.
"""
process_name = cls(yaml_file).create_process_definition()
return process_name
def prepare_refer_process(self, content):
"""Allow YAML files to reference process derived from other YAML files."""
process_paths = re.findall(r"\$WORKFLOW\{\"(.*?)\"\}", content)
for process_path in process_paths:
logger.info(
f"find special token {process_path}, load process form {process_path}"
)
possible_path = ParseTool.get_possible_path(process_path, self._base_folder)
process_name = YamlProcess.parse(possible_path)
content = content.replace('$WORKFLOW{"%s"}' % process_path, process_name)
return content
def parse_task(self, task_data: dict, name2task: Dict[str, Task]):
"""Parse various types of tasks.
:param task_data: dict.
{
"task_type": "Shell",
"params": {"name": "shell_task", "command":"ehco hellp"}
}
:param name2task: Dict[str, Task]), mapping of task_name and task
Some task type have special parse func:
if task type is Switch, use parse_switch;
if task type is Condition, use parse_condition;
if task type is Dependent, use parse_dependent;
other, we pass all task_params as input to task class, like "task_cls(**task_params)".
"""
task_type = task_data["task_type"]
# get params without special key
task_params = {k: v for k, v in task_data.items() if k not in TASK_SPECIAL_KEYS}
task_cls = get_task_cls(task_type)
# use YamlProcess._parse_rules to parse special value of yaml file
task_params = self.parse_params(task_params)
if task_cls == tasks.Switch:
task = self.parse_switch(task_params, name2task)
elif task_cls == tasks.Condition:
task = self.parse_condition(task_params, name2task)
elif task_cls == tasks.Dependent:
task = self.parse_dependent(task_params, name2task)
else:
task = task_cls(**task_params)
logger.info(task_type, task)
return task
def parse_switch(self, task_params, name2task):
"""Parse Switch Task.
This is an example Yaml fragment of task_params
name: switch
condition:
- ["${var} > 1", switch_child_1]
- switch_child_2
"""
from pydolphinscheduler.tasks.switch import (
Branch,
Default,
Switch,
SwitchCondition,
)
condition_datas = task_params["condition"]
conditions = []
for condition_data in condition_datas:
assert "task" in condition_data, "task must be in %s" % condition_data
task_name = condition_data["task"]
condition_string = condition_data.get("condition", None)
# if condition_string is None, for example: {"task": "switch_child_2"}, set it to Default branch
if condition_string is None:
conditions.append(Default(task=name2task.get(task_name)))
# if condition_string is not None, for example:
# {"task": "switch_child_2", "condition": "${var} > 1"} set it to Branch
else:
conditions.append(
Branch(condition_string, task=name2task.get(task_name))
)
switch = Switch(
name=task_params["name"], condition=SwitchCondition(*conditions)
)
return switch
def parse_condition(self, task_params, name2task):
"""Parse Condition Task.
This is an example Yaml fragment of task_params
name: condition
success_task: success_branch
failed_task: fail_branch
OP: AND
groups:
-
OP: AND
groups:
- [pre_task_1, true]
- [pre_task_2, true]
- [pre_task_3, false]
-
OP: AND
groups:
- [pre_task_1, false]
- [pre_task_2, true]
- [pre_task_3, true]
"""
from pydolphinscheduler.tasks.condition import (
FAILURE,
SUCCESS,
And,
Condition,
Or,
)
def get_op_cls(op):
cls = None
if op.lower() == "and":
cls = And
elif op.lower() == "or":
cls = Or
else:
raise Exception("OP must be in And or Or, but get: %s" % op)
return cls
second_cond_ops = []
for first_group in task_params["groups"]:
second_op = first_group["op"]
task_ops = []
for condition_data in first_group["groups"]:
assert "task" in condition_data, "task must be in %s" % condition_data
assert "flag" in condition_data, "flag must be in %s" % condition_data
task_name = condition_data["task"]
flag = condition_data["flag"]
task = name2task[task_name]
# for example: task = pre_task_1, flag = true
if flag:
task_ops.append(SUCCESS(task))
else:
task_ops.append(FAILURE(task))
second_cond_ops.append(get_op_cls(second_op)(*task_ops))
first_op = task_params["op"]
cond_operator = get_op_cls(first_op)(*second_cond_ops)
condition = Condition(
name=task_params["name"],
condition=cond_operator,
success_task=name2task[task_params["success_task"]],
failed_task=name2task[task_params["failed_task"]],
)
return condition
def parse_dependent(self, task_params, name2task):
"""Parse Dependent Task.
This is an example Yaml fragment of task_params
name: dependent
denpendence:
OP: AND
groups:
-
OP: Or
groups:
- [pydolphin, task_dependent_external, task_1]
- [pydolphin, task_dependent_external, task_2]
-
OP: And
groups:
- [pydolphin, task_dependent_external, task_1, LAST_WEDNESDAY]
- [pydolphin, task_dependent_external, task_2, last24Hours]
"""
from pydolphinscheduler.tasks.dependent import (
And,
Dependent,
DependentDate,
DependentItem,
Or,
)
def process_dependent_date(dependent_date):
"""Parse dependent date (Compatible with key and value of DependentDate)."""
dependent_date_upper = dependent_date.upper()
if hasattr(DependentDate, dependent_date_upper):
dependent_date = getattr(DependentDate, dependent_date_upper)
return dependent_date
def get_op_cls(op):
cls = None
if op.lower() == "and":
cls = And
elif op.lower() == "or":
cls = Or
else:
raise Exception("OP must be in And or Or, but get: %s" % op)
return cls
def create_dependent_item(source_items):
"""Parse dependent item.
project_name: pydolphin
process_definition_name: task_dependent_external
dependent_task_name: task_1
dependent_date: LAST_WEDNESDAY
"""
project_name = source_items["project_name"]
process_definition_name = source_items["process_definition_name"]
dependent_task_name = source_items["dependent_task_name"]
dependent_date = source_items.get("dependent_date", DependentDate.TODAY)
dependent_item = DependentItem(
project_name=project_name,
process_definition_name=process_definition_name,
dependent_task_name=dependent_task_name,
dependent_date=process_dependent_date(dependent_date),
)
return dependent_item
second_dependences = []
for first_group in task_params["groups"]:
second_op = first_group[KEY_OP]
dependence_items = []
for source_items in first_group["groups"]:
dependence_items.append(create_dependent_item(source_items))
second_dependences.append(get_op_cls(second_op)(*dependence_items))
first_op = task_params[KEY_OP]
dependence = get_op_cls(first_op)(*second_dependences)
task = Dependent(
name=task_params["name"],
dependence=dependence,
)
return task
def create_process_definition(yaml_file):
"""CLI."""
YamlProcess.parse(yaml_file)

58
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/default_config.yaml

@ -1,58 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Setting about Java gateway server
java_gateway:
# The address of Python gateway server start. Set its value to `0.0.0.0` if your Python API run in different
# between Python gateway server. It could be be specific to other address like `127.0.0.1` or `localhost`
address: 127.0.0.1
# The port of Python gateway server start. Define which port you could connect to Python gateway server from
# Python API models.
port: 25333
# Whether automatically convert Python objects to Java Objects. Default value is ``True``. There is some
# performance lost when set to ``True`` but for now pydolphinscheduler do not handle the convert issue between
# java and Python, mark it as TODO item in the future.
auto_convert: true
# Setting about dolphinscheduler default value, will use the value set below if property do not set, which
# including ``user``, ``workflow``
default:
# Default value for dolphinscheduler's user object
user:
name: userPythonGateway
password: userPythonGateway
email: userPythonGateway@dolphinscheduler.com
tenant: tenant_pydolphin
phone: 11111111111
state: 1
# Default value for dolphinscheduler's workflow object
workflow:
project: project-pydolphin
tenant: tenant_pydolphin
user: userPythonGateway
queue: queuePythonGateway
worker_group: default
# Release state of workflow, default value is ``online`` which mean setting workflow online when it submits
# to Java gateway, if you want to set workflow offline set its value to ``offline``
release_state: online
time_zone: Asia/Shanghai
# Warning type of the workflow, default value is ``NONE`` mean do not warn user in any cases of workflow state,
# change to ``FAILURE`` if you want to warn users when workflow failed. All available enum value are
# ``NONE``, ``SUCCESS``, ``FAILURE``, ``ALL``
warning_type: NONE

18
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/__init__.py

@ -1,18 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init examples package which provides users with pydolphinscheduler examples."""

55
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/bulk_create_example.py

@ -1,55 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
This example show you how to create workflows in batch mode.
After this example run, we will create 10 workflows named `workflow:<workflow_num>`, and with 3 tasks
named `task:<task_num>-workflow:<workflow_num>` in each workflow. Task shape as below
task:1-workflow:1 -> task:2-workflow:1 -> task:3-workflow:1
Each workflow is linear since we set `IS_CHAIN=True`, you could change task to parallel by set it to `False`.
"""
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.shell import Shell
NUM_WORKFLOWS = 10
NUM_TASKS = 5
# Make sure your tenant exists in your operator system
TENANT = "exists_tenant"
# Whether task should dependent on pre one or not
# False will create workflow with independent task, while True task will dependent on pre-task and dependence
# link like `pre_task -> current_task -> next_task`, default True
IS_CHAIN = True
for wf in range(0, NUM_WORKFLOWS):
workflow_name = f"workflow:{wf}"
with ProcessDefinition(name=workflow_name, tenant=TENANT) as pd:
for t in range(0, NUM_TASKS):
task_name = f"task:{t}-{workflow_name}"
command = f"echo This is task {task_name}"
task = Shell(name=task_name, command=command)
if IS_CHAIN and t > 0:
pre_task_name = f"task:{t-1}-{workflow_name}"
pd.get_one_task_by_name(pre_task_name) >> task
# We just submit workflow and task definition without set schedule time or run it manually
pd.submit()

59
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_condition_example.py

@ -1,59 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# [start workflow_declare]
r"""
A example workflow for task condition.
This example will create five task in single workflow, with four shell task and one condition task. Task
condition have one upstream which we declare explicit with syntax `parent >> condition`, and three downstream
automatically set dependence by condition task by passing parameter `condition`. The graph of this workflow
like:
pre_task_1 -> -> success_branch
\ /
pre_task_2 -> -> conditions ->
/ \
pre_task_3 -> -> fail_branch
.
"""
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.condition import FAILURE, SUCCESS, And, Condition
from pydolphinscheduler.tasks.shell import Shell
with ProcessDefinition(name="task_condition_example", tenant="tenant_exists") as pd:
pre_task_1 = Shell(name="pre_task_1", command="echo pre_task_1")
pre_task_2 = Shell(name="pre_task_2", command="echo pre_task_2")
pre_task_3 = Shell(name="pre_task_3", command="echo pre_task_3")
cond_operator = And(
And(
SUCCESS(pre_task_1, pre_task_2),
FAILURE(pre_task_3),
),
)
success_branch = Shell(name="success_branch", command="echo success_branch")
fail_branch = Shell(name="fail_branch", command="echo fail_branch")
condition = Condition(
name="condition",
condition=cond_operator,
success_task=success_branch,
failed_task=fail_branch,
)
pd.submit()
# [end workflow_declare]

95
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_datax_example.py

@ -1,95 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# [start workflow_declare]
"""
A example workflow for task datax.
This example will create a workflow named `task_datax`.
`task_datax` is true workflow define and run task task_datax.
You can create data sources `first_mysql` and `first_mysql` through UI.
It creates a task to synchronize datax from the source database to the target database.
"""
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.datax import CustomDataX, DataX
# datax json template
JSON_TEMPLATE = {
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "usr",
"password": "pwd",
"column": ["id", "name", "code", "description"],
"splitPk": "id",
"connection": [
{
"table": ["source_table"],
"jdbcUrl": ["jdbc:mysql://127.0.0.1:3306/source_db"],
}
],
},
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "usr",
"password": "pwd",
"column": ["id", "name"],
"connection": [
{
"jdbcUrl": "jdbc:mysql://127.0.0.1:3306/target_db",
"table": ["target_table"],
}
],
},
},
}
],
"setting": {
"errorLimit": {"percentage": 0, "record": 0},
"speed": {"channel": 1, "record": 1000},
},
}
}
with ProcessDefinition(
name="task_datax_example",
tenant="tenant_exists",
) as pd:
# This task synchronizes the data in `t_ds_project`
# of `first_mysql` database to `target_project` of `second_mysql` database.
# You have to make sure data source named `first_mysql` and `second_mysql` exists
# in your environment.
task1 = DataX(
name="task_datax",
datasource_name="first_mysql",
datatarget_name="second_mysql",
sql="select id, name, code, description from source_table",
target_table="target_table",
)
# You can custom json_template of datax to sync data. This task create a new
# datax job same as task1, transfer record from `first_mysql` to `second_mysql`
task2 = CustomDataX(name="task_custom_datax", json=str(JSON_TEMPLATE))
pd.run()
# [end workflow_declare]

74
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dependent_example.py

@ -1,74 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# [start workflow_declare]
r"""
A example workflow for task dependent.
This example will create two workflows named `task_dependent` and `task_dependent_external`.
`task_dependent` is true workflow define and run task dependent, while `task_dependent_external`
define outside workflow and task from dependent.
After this script submit, we would get workflow as below:
task_dependent_external:
task_1
task_2
task_3
task_dependent:
task_dependent(this task dependent on task_dependent_external.task_1 and task_dependent_external.task_2).
"""
from pydolphinscheduler import configuration
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.dependent import And, Dependent, DependentItem, Or
from pydolphinscheduler.tasks.shell import Shell
with ProcessDefinition(
name="task_dependent_external",
tenant="tenant_exists",
) as pd:
task_1 = Shell(name="task_1", command="echo task 1")
task_2 = Shell(name="task_2", command="echo task 2")
task_3 = Shell(name="task_3", command="echo task 3")
pd.submit()
with ProcessDefinition(
name="task_dependent_example",
tenant="tenant_exists",
) as pd:
task = Dependent(
name="task_dependent",
dependence=And(
Or(
DependentItem(
project_name=configuration.WORKFLOW_PROJECT,
process_definition_name="task_dependent_external",
dependent_task_name="task_1",
),
DependentItem(
project_name=configuration.WORKFLOW_PROJECT,
process_definition_name="task_dependent_external",
dependent_task_name="task_2",
),
)
),
)
pd.submit()
# [end workflow_declare]

52
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dvc_example.py

@ -1,52 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# [start workflow_declare]
"""A example workflow for task dvc."""
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks import DVCDownload, DVCInit, DVCUpload
repository = "git@github.com:<YOUR-NAME-OR-ORG>/dvc-data-repository-example.git"
with ProcessDefinition(
name="task_dvc_example",
tenant="tenant_exists",
) as pd:
init_task = DVCInit(name="init_dvc", repository=repository, store_url="~/dvc_data")
upload_task = DVCUpload(
name="upload_data",
repository=repository,
data_path_in_dvc_repository="iris",
data_path_in_worker="~/source/iris",
version="v1",
message="upload iris data v1",
)
download_task = DVCDownload(
name="download_data",
repository=repository,
data_path_in_dvc_repository="iris",
data_path_in_worker="~/target/iris",
version="v1",
)
init_task >> upload_task >> download_task
pd.run()
# [end workflow_declare]

33
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_flink_example.py

@ -1,33 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# [start workflow_declare]
"""A example workflow for task flink."""
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.flink import DeployMode, Flink, ProgramType
with ProcessDefinition(name="task_flink_example", tenant="tenant_exists") as pd:
task = Flink(
name="task_flink",
main_class="org.apache.flink.streaming.examples.wordcount.WordCount",
main_package="WordCount.jar",
program_type=ProgramType.JAVA,
deploy_mode=DeployMode.LOCAL,
)
pd.run()
# [end workflow_declare]

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save