diff --git a/dolphinscheduler-python/pydolphinscheduler/.flake8 b/.flake8 similarity index 81% rename from dolphinscheduler-python/pydolphinscheduler/.flake8 rename to .flake8 index 120b42fb68..f6829fc382 100644 --- a/dolphinscheduler-python/pydolphinscheduler/.flake8 +++ b/.flake8 @@ -19,15 +19,6 @@ max-line-length = 110 exclude = .git, - __pycache__, - .pytest_cache, - *.egg-info, - docs/source/conf.py - old, - build, - dist, - htmlcov, - .tox, dist, ignore = # It's clear and not need to add docstring @@ -35,6 +26,3 @@ ignore = D105, # D105: Missing docstring in magic method # Conflict to Black W503 # W503: Line breaks before binary operators -per-file-ignores = - */pydolphinscheduler/side/__init__.py:F401 - */pydolphinscheduler/tasks/__init__.py:F401 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b58a97f831..eceda6a97a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -38,7 +38,6 @@ /dolphinscheduler-task-plugin/ @caishunfeng @SbloodyS @zhuangchong /dolphinscheduler-tools/ @caishunfeng @SbloodyS @zhongjiajie @EricGao888 /script/ @caishunfeng @SbloodyS @zhongjiajie @EricGao888 -/dolphinscheduler-python/ @zhongjiajie /dolphinscheduler-ui/ @songjianet @Amy0104 /docs/ @zhongjiajie @Tianqi-Dotes @EricGao888 /licenses/ @kezhenxu94 @zhongjiajie diff --git a/.github/actions/labeler/labeler.yml b/.github/actions/labeler/labeler.yml index 4bb724fed2..fbfcb098fe 100644 --- a/.github/actions/labeler/labeler.yml +++ b/.github/actions/labeler/labeler.yml @@ -15,9 +15,6 @@ # limitations under the License. # -Python: - - any: ['dolphinscheduler-python/**/*'] - backend: - 'dolphinscheduler-alert/**/*' - 'dolphinscheduler-api/**/*' @@ -40,7 +37,6 @@ backend: document: - 'docs/**/*' - - 'dolphinscheduler-python/pydolphinscheduler/docs/**/*' CI&CD: - any: ['.github/**/*'] diff --git a/.github/workflows/py-ci.yml b/.github/workflows/py-ci.yml deleted file mode 100644 index 7e0333efd8..0000000000 --- a/.github/workflows/py-ci.yml +++ /dev/null @@ -1,205 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: Python API - -on: - push: - branches: - - dev - paths: - - 'dolphinscheduler-python/**' - pull_request: - -concurrency: - group: py-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -defaults: - run: - working-directory: dolphinscheduler-python/pydolphinscheduler - -# We have to update setuptools wheel to package with package_data, LICENSE, NOTICE -env: - DEPENDENCES: pip setuptools wheel tox - -jobs: - paths-filter: - name: Python-Path-Filter - runs-on: ubuntu-latest - outputs: - not-docs: ${{ steps.filter.outputs.not-docs }} - py-change: ${{ steps.filter.outputs.py-change }} - steps: - - uses: actions/checkout@v2 - - uses: dorny/paths-filter@b2feaf19c27470162a626bd6fa8438ae5b263721 - id: filter - with: - filters: | - not-docs: - - '!(docs/**)' - py-change: - - 'dolphinscheduler-python/pydolphinscheduler/**' - lint: - name: Lint - if: ${{ (needs.paths-filter.outputs.py-change == 'true') || (github.event_name == 'push') }} - timeout-minutes: 15 - needs: paths-filter - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.7 - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - name: Install Dependences - run: | - python -m pip install --upgrade ${{ env.DEPENDENCES }} - - name: Run All Lint Check - run: | - python -m tox -vv -e lint - pytest: - name: Pytest - timeout-minutes: 15 - needs: lint - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - # YAML parse `3.10` to `3.1`, so we have to add quotes for `'3.10'`, see also: - # https://github.com/actions/setup-python/issues/160#issuecomment-724485470 - python-version: [3.6, 3.7, 3.8, 3.9, '3.10', 3.11-dev] - os: [ubuntu-latest, macOS-latest, windows-latest] - # Skip because dependence [py4j](https://pypi.org/project/py4j/) not work on those environments - exclude: - - os: windows-latest - python-version: '3.10' - - os: windows-latest - python-version: 3.11-dev - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install Dependences - run: | - python -m pip install --upgrade ${{ env.DEPENDENCES }} - - name: Run All Tests - run: | - python -m tox -vv -e code-test - doc-build: - name: Docs Build Test - timeout-minutes: 15 - needs: lint - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - env-list: [doc-build, doc-build-multi] - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.7 - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - name: Install Dependences - run: | - python -m pip install --upgrade ${{ env.DEPENDENCES }} - - name: Run Build Docs Tests ${{ matrix.env-list }} - run: | - python -m tox -vv -e ${{ matrix.env-list }} - local-ci: - name: Local CI - timeout-minutes: 15 - needs: - - pytest - - doc-build - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.7 - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - name: Install Dependences - run: | - python -m pip install --upgrade ${{ env.DEPENDENCES }} - - name: Run Tests Build Docs - run: | - python -m tox -vv -e local-ci - integrate-test: - name: Integrate Test - if: ${{ (needs.paths-filter.outputs.not-docs == 'true') || (github.event_name == 'push') }} - runs-on: ubuntu-latest - needs: paths-filter - timeout-minutes: 30 - steps: - - uses: actions/checkout@v2 - with: - submodules: true - - name: Sanity Check - uses: ./.github/actions/sanity-check - with: - token: ${{ secrets.GITHUB_TOKEN }} - - name: Cache local Maven repository - uses: actions/cache@v3 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-maven- - # Switch to project root directory to run mvnw command - - name: Build Image - working-directory: ./ - run: | - ./mvnw -B clean install \ - -Dmaven.test.skip \ - -Dmaven.javadoc.skip \ - -Dcheckstyle.skip=true \ - -Pdocker,release -Ddocker.tag=ci \ - -pl dolphinscheduler-standalone-server -am - - name: Set up Python 3.7 - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - name: Install Dependences - run: | - python -m pip install --upgrade ${{ env.DEPENDENCES }} - - name: Run Integrate Tests - run: | - python -m tox -vv -e integrate-test - result: - name: Python - runs-on: ubuntu-latest - timeout-minutes: 30 - needs: [ paths-filter, local-ci, integrate-test ] - if: always() - steps: - - name: Status - # We need change CWD to current directory to avoid global default working directory not exists - working-directory: ./ - run: | - if [[ ${{ needs.paths-filter.outputs.not-docs }} == 'false' && ${{ github.event_name }} == 'pull_request' ]]; then - echo "Only document change, skip both python unit and integrate test!" - exit 0 - fi - if [[ ${{ needs.paths-filter.outputs.py-change }} == 'false' && ${{ needs.integrate-test.result }} == 'success' && ${{ github.event_name }} == 'pull_request' ]]; then - echo "No python code change, and integrate test pass!" - exit 0 - fi - if [[ ${{ needs.integrate-test.result }} != 'success' || ${{ needs.local-ci.result }} != 'success' ]]; then - echo "py-ci Failed!" - exit -1 - fi diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 441672839b..6acfa1fc4b 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -23,7 +23,6 @@ on: paths-ignore: - '**/*.md' - 'dolphinscheduler-ui' - - 'dolphinscheduler-python/pydolphinscheduler' branches: - dev diff --git a/.gitignore b/.gitignore index 1082e4b155..e5eccc1308 100644 --- a/.gitignore +++ b/.gitignore @@ -50,18 +50,3 @@ dolphinscheduler-common/test dolphinscheduler-worker/logs dolphinscheduler-master/logs dolphinscheduler-api/logs - -# ------------------ -# pydolphinscheduler -# ------------------ -# Cache -__pycache__/ -.tox/ - -# Build -build/ -*egg-info/ - -# Test coverage -.coverage -htmlcov/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6b36749392..e51d15a16e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -41,11 +41,6 @@ repos: 'flake8-docstrings>=1.6', 'flake8-black>=0.2', ] - # pre-commit run in the root, so we have to point out the full path of configuration - args: [ - --config, - dolphinscheduler-python/pydolphinscheduler/.flake8 - ] - repo: https://github.com/pycqa/autoflake rev: v1.4 hooks: diff --git a/README.md b/README.md index 8c49415866..0b25dda917 100644 --- a/README.md +++ b/README.md @@ -19,10 +19,11 @@ Apache DolphinScheduler is the modern data workflow orchestration platform with The key features for DolphinScheduler are as follows: - Easy to deploy, we provide 4 ways to deploy, such as Standalone deployment,Cluster deployment,Docker / Kubernetes deployment and Rainbond deployment -- Easy to use, there are 3 ways to create workflows: +- Easy to use, there are four ways to create workflows: - Visually, create tasks by dragging and dropping tasks - - Creating workflows by PyDolphinScheduler(Python way) - - Creating workflows through Open API + - [PyDolphinScheduler](https://dolphinscheduler.apache.org/python/dev/index.html), Creating workflows via Python API, aka workflow-as-code + - Yaml definition, mapping yaml into workflow(have to install PyDolphinScheduler currently) + - Open API, Creating workflows - Highly Reliable, DolphinScheduler uses a decentralized multi-master and multi-worker architecture, which naturally supports horizontal scaling and high availability diff --git a/docs/docs/en/contribute/release/release-post.md b/docs/docs/en/contribute/release/release-post.md index 8d24b3a80f..20a8e43008 100644 --- a/docs/docs/en/contribute/release/release-post.md +++ b/docs/docs/en/contribute/release/release-post.md @@ -1,7 +1,7 @@ # Release Post We still have some publish task to do after we send the announcement mail, currently we have to publish Docker images to -Docker Hub and also publish pydolphinscheduler to PyPI. +Docker Hub. ## Publish Docker Image @@ -20,11 +20,6 @@ We could reuse the main command the CI run and publish our Docker images to Dock -Pdocker,release ``` -## Publish pydolphinscheduler to PyPI - -Python API need to release to PyPI for easier download and use, you can see more detail in [Python API release](https://github.com/apache/dolphinscheduler/blob/dev/dolphinscheduler-python/pydolphinscheduler/RELEASE.md#to-pypi) -to finish PyPI release. - ## Get All Contributors You might need all contributors in current release when you want to publish the release news or announcement, you could diff --git a/docs/docs/en/contribute/release/release-prepare.md b/docs/docs/en/contribute/release/release-prepare.md index e7fb41c5a1..30bcaae52f 100644 --- a/docs/docs/en/contribute/release/release-prepare.md +++ b/docs/docs/en/contribute/release/release-prepare.md @@ -23,7 +23,6 @@ For example, to release `x.y.z`, the following updates are required: - `deploy/kubernetes/dolphinscheduler`: - `Chart.yaml`: `appVersion` needs to be updated to x.y.z (`version` is helm chart version,incremented and different from x.y.z) - `values.yaml`: `image.tag` needs to be updated to x.y.z - - `dolphinscheduler-python/pydolphinscheduler/setup.py`: change `version` to x.y.z - Version in the docs: - Change the placeholder ``(except `pom`) to the `x.y.z` in directory `docs` - Add new history version diff --git a/docs/docs/en/contribute/release/release.md b/docs/docs/en/contribute/release/release.md index ff2b9fe1e3..dffb5fb4fc 100644 --- a/docs/docs/en/contribute/release/release.md +++ b/docs/docs/en/contribute/release/release.md @@ -10,8 +10,6 @@ all conditions are met, if any or them are missing, you should install them and java -version # Maven requests mvn -version -# Python 3.6 above is requests, and you have to make keyword `python` work in your terminal and version match -python --version ``` ## GPG Settings @@ -166,13 +164,10 @@ git push origin "${VERSION}"-release ### Pre-Release Check ```shell -# make gpg command could be run in maven correct -export GPG_TTY=$(tty) - -mvn release:prepare -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DdryRun=true -Dusername="${GH_USERNAME}" +mvn release:prepare -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DdryRun=true -Dusername="${GH_USERNAME}" ``` -* `-Prelease,python`: choose release and python profile, which will pack all the source codes, jar files and executable binary packages, and Python distribute package. +* `-Prelease`: choose release profile, which will pack all the source codes, jar files and executable binary packages. * `-DautoVersionSubmodules=true`: it can make the version number is inputted only once and not for each sub-module. * `-DdryRun=true`: dry run which means not to generate or submit new version number and new tag. @@ -187,7 +182,7 @@ mvn release:clean Then, prepare to execute the release. ```shell -mvn release:prepare -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DpushChanges=false -Dusername="${GH_USERNAME}" +mvn release:prepare -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DpushChanges=false -Dusername="${GH_USERNAME}" ``` It is basically the same as the previous rehearsal command, but deleting `-DdryRun=true` parameter. @@ -219,7 +214,7 @@ git push origin --tags ### Deploy the Release ```shell -mvn release:perform -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -Dusername="${GH_USERNAME}" +mvn release:perform -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -Dusername="${GH_USERNAME}" ``` After that command is executed, the version to be released will be uploaded to Apache staging repository automatically. @@ -267,7 +262,6 @@ Create folder by version number. ```shell mkdir -p ~/ds_svn/dev/dolphinscheduler/"${VERSION}" -mkdir -p ~/ds_svn/dev/dolphinscheduler/"${VERSION}"/python cd ~/ds_svn/dev/dolphinscheduler/"${VERSION}" ``` @@ -277,9 +271,6 @@ Add source code packages, binary packages and executable binary packages to SVN # Source and binary tarball for main code cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/*.tar.gz ~/ds_svn/dev/dolphinscheduler/"${VERSION}" cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/*.tar.gz.asc ~/ds_svn/dev/dolphinscheduler/"${VERSION}" - -# Source and binary tarball for Python API -cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/python/* ~/ds_svn/dev/dolphinscheduler/"${VERSION}"/python ``` ### Generate sign files @@ -287,10 +278,6 @@ cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/python/* ~/ds_svn/dev/dolp ```shell shasum -a 512 apache-dolphinscheduler-"${VERSION}"-src.tar.gz >> apache-dolphinscheduler-"${VERSION}"-src.tar.gz.sha512 shasum -b -a 512 apache-dolphinscheduler-"${VERSION}"-bin.tar.gz >> apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.sha512 -cd python -shasum -a 512 apache-dolphinscheduler-python-"${VERSION}".tar.gz >> apache-dolphinscheduler-python-"${VERSION}".tar.gz.sha512 -shasum -b -a 512 apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl >> apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.sha512 -cd ../ ``` ### Commit to Apache SVN @@ -308,10 +295,6 @@ svn --username="${A_USERNAME}" commit -m "release ${VERSION}" ```shell shasum -c apache-dolphinscheduler-"${VERSION}"-src.tar.gz.sha512 shasum -c apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.sha512 -cd python -shasum -c apache-dolphinscheduler-python-"${VERSION}".tar.gz.sha512 -shasum -c apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.sha512 -cd ../ ``` ### Check gpg Signature @@ -345,10 +328,6 @@ Then, check the gpg signature. ```shell gpg --verify apache-dolphinscheduler-"${VERSION}"-src.tar.gz.asc gpg --verify apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.asc -cd python -gpg --verify apache-dolphinscheduler-python-"${VERSION}".tar.gz.asc -gpg --verify apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.asc -cd ../ ``` > Note: You have to create gpg signature manually when you can not find your `asc` file, the command @@ -359,7 +338,7 @@ cd ../ #### Check source package -Decompress `apache-dolphinscheduler--src.tar.gz` and `python/apache-dolphinscheduler-python-.tar.gz` then check the following items: +Decompress `apache-dolphinscheduler--src.tar.gz` then check the following items: * Check whether source tarball is oversized for including nonessential files * `LICENSE` and `NOTICE` files exist @@ -372,8 +351,7 @@ Decompress `apache-dolphinscheduler--src.tar.gz` and `python/apache-dol #### Check binary packages -Decompress `apache-dolphinscheduler--src.tar.gz` and `python/apache-dolphinscheduler-python--bin.tar.gz` -to check the following items: +Decompress `apache-dolphinscheduler--src.tar.gz` to check the following items: - `LICENSE` and `NOTICE` files exist - Correct year in `NOTICE` file diff --git a/docs/docs/zh/contribute/release/release-post.md b/docs/docs/zh/contribute/release/release-post.md index 783503f659..fe1f7e323f 100644 --- a/docs/docs/zh/contribute/release/release-post.md +++ b/docs/docs/zh/contribute/release/release-post.md @@ -1,6 +1,6 @@ # 发版后续 -发送公告邮件后,我们还有一些发布任务要做,目前我们必须将 Docker 镜像发布到 Docker Hub 和 并且需要将 pydolphinscheduler 发布到 PyPI。 +发送公告邮件后,我们还有一些发布任务要做,目前我们必须将 Docker 镜像发布到 Docker Hub。 ## 发布 Docker 镜像 @@ -19,11 +19,6 @@ -Pdocker,release ``` -## 发布 pydolphinscheduler 到 PyPI - -需要将 Python API 发布到 PyPI,请参考 [Python API release](https://github.com/apache/dolphinscheduler/blob/dev/dolphinscheduler-python/pydolphinscheduler/RELEASE.md#to-pypi) -完成 PyPI 的发版 - ## 获取全部的贡献者 当您想要发布新版本的新闻或公告时,您可能需要当前版本的所有贡献者,您可以在 `tools/release` 中使用命令 `python release.py contributor` 自动生成贡献者 Github id。 diff --git a/docs/docs/zh/contribute/release/release-prepare.md b/docs/docs/zh/contribute/release/release-prepare.md index 9fd8d9dfed..85eea69e6b 100644 --- a/docs/docs/zh/contribute/release/release-prepare.md +++ b/docs/docs/zh/contribute/release/release-prepare.md @@ -23,7 +23,6 @@ - `deploy/kubernetes/dolphinscheduler`: - `Chart.yaml`: `appVersion` 版本更新为 x.y.z (`version` 为 helm chart 版本, 增量更新但不要设置为 x.y.z) - `values.yaml`: `image.tag` 版本更新为 x.y.z - - `dolphinscheduler-python/pydolphinscheduler/setup.py`: 修改其中的 `version` 为 x.y.z - 修改文档(docs模块)中的版本号: - 将 `docs` 文件夹下文件的占位符 `` (除了 pom.xml 相关的) 修改成 `x.y.z` - 新增历史版本 diff --git a/docs/docs/zh/contribute/release/release.md b/docs/docs/zh/contribute/release/release.md index 5b00867b77..f8137ef78a 100644 --- a/docs/docs/zh/contribute/release/release.md +++ b/docs/docs/zh/contribute/release/release.md @@ -9,8 +9,6 @@ java -version # 需要 Maven mvn -version -# 需要 Python 3.6 及以上的版本,并且需要 `python` 关键字能在命令行中运行,且版本符合条件。 -python --version ``` ## GPG设置 @@ -172,14 +170,11 @@ git push origin ${RELEASE.VERSION}-release ### 发布预校验 ```shell -# 保证 python profile 的 gpg 可以正常运行 -export GPG_TTY=$(tty) - # 运行发版校验 -mvn release:prepare -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DdryRun=true -Dusername="${GH_USERNAME}" +mvn release:prepare -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DdryRun=true -Dusername="${GH_USERNAME}" ``` -* `-Prelease,python`: 选择release和python的profile,这个profile会打包所有源码、jar文件以及可执行二进制包,以及Python的二进制包。 +* `-Prelease`: 选择release的profile,这个profile会打包所有源码、jar文件以及可执行二进制包。 * `-DautoVersionSubmodules=true`: 作用是发布过程中版本号只需要输入一次,不必为每个子模块都输入一次。 * `-DdryRun=true`: 演练,即不产生版本号提交,不生成新的tag。 @@ -194,7 +189,7 @@ mvn release:clean 然后准备执行发布。 ```shell -mvn release:prepare -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DpushChanges=false -Dusername="${GH_USERNAME}" +mvn release:prepare -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DpushChanges=false -Dusername="${GH_USERNAME}" ``` 和上一步演练的命令基本相同,去掉了 `-DdryRun=true` 参数。 @@ -223,7 +218,7 @@ git push origin --tags ### 部署发布 ```shell -mvn release:perform -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -Dusername="${GH_USERNAME}" +mvn release:perform -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -Dusername="${GH_USERNAME}" ``` 执行完该命令后,待发布版本会自动上传到Apache的临时筹备仓库(staging repository)。你可以通过访问 [apache staging repositories](https://repository.apache.org/#stagingRepositories) @@ -270,7 +265,6 @@ svn --username="${A_USERNAME}" commit -m "new key add" ```shell mkdir -p ~/ds_svn/dev/dolphinscheduler/"${VERSION}" -mkdir -p ~/ds_svn/dev/dolphinscheduler/"${VERSION}"/python cd ~/ds_svn/dev/dolphinscheduler/"${VERSION}" ``` @@ -280,9 +274,6 @@ cd ~/ds_svn/dev/dolphinscheduler/"${VERSION}" # 主程序源码包和二进制包 cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/*.tar.gz ~/ds_svn/dev/dolphinscheduler/"${VERSION}" cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/*.tar.gz.asc ~/ds_svn/dev/dolphinscheduler/"${VERSION}" - -# Python API 源码和二进制包 -cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/python/* ~/ds_svn/dev/dolphinscheduler/"${VERSION}"/python ``` ### 生成文件签名 @@ -290,10 +281,6 @@ cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/python/* ~/ds_svn/dev/dolp ```shell shasum -a 512 apache-dolphinscheduler-"${VERSION}"-src.tar.gz >> apache-dolphinscheduler-"${VERSION}"-src.tar.gz.sha512 shasum -b -a 512 apache-dolphinscheduler-"${VERSION}"-bin.tar.gz >> apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.sha512 -cd python -shasum -a 512 apache-dolphinscheduler-python-"${VERSION}".tar.gz >> apache-dolphinscheduler-python-"${VERSION}".tar.gz.sha512 -shasum -b -a 512 apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl >> apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.sha512 -cd ../ ``` ### 提交Apache SVN @@ -311,10 +298,6 @@ svn --username="${A_USERNAME}" commit -m "release ${VERSION}" ```shell shasum -c apache-dolphinscheduler-"${VERSION}"-src.tar.gz.sha512 shasum -c apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.sha512 -cd python -shasum -c apache-dolphinscheduler-python-"${VERSION}".tar.gz.sha512 -shasum -c apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.sha512 -cd ../ ``` ### 检查gpg签名 @@ -347,10 +330,6 @@ Your decision? 5 ```shell gpg --verify apache-dolphinscheduler-"${VERSION}"-src.tar.gz.asc gpg --verify apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.asc -cd python -gpg --verify apache-dolphinscheduler-python-"${VERSION}".tar.gz.asc -gpg --verify apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.asc -cd ../ ``` > 注意:当你找不到你的 `asc` 文件时,你必须手动创建 gpg 签名,命令 @@ -361,7 +340,7 @@ cd ../ #### 检查源码包的文件内容 -解压缩`apache-dolphinscheduler--src.tar.gz`以及Python文件夹下的`apache-dolphinscheduler-python-.tar.gz`,进行如下检查: +解压缩`apache-dolphinscheduler--src.tar.gz`,进行如下检查: - 检查源码包是否包含由于包含不必要文件,致使tarball过于庞大 - 存在`LICENSE`和`NOTICE`文件 @@ -373,8 +352,7 @@ cd ../ #### 检查二进制包的文件内容 -解压缩`apache-dolphinscheduler--src.tar.gz`和`apache-dolphinscheduler-python--bin.tar.gz` -进行如下检查: +解压缩`apache-dolphinscheduler--src.tar.gz`进行如下检查: - 存在`LICENSE`和`NOTICE`文件 - 所有文本文件开头都有ASF许可证 diff --git a/dolphinscheduler-api/pom.xml b/dolphinscheduler-api/pom.xml index cc34dcba1e..facd4e4e69 100644 --- a/dolphinscheduler-api/pom.xml +++ b/dolphinscheduler-api/pom.xml @@ -163,7 +163,7 @@ - + net.sf.py4j py4j diff --git a/dolphinscheduler-dist/pom.xml b/dolphinscheduler-dist/pom.xml index b202cdd281..ee4c85589e 100644 --- a/dolphinscheduler-dist/pom.xml +++ b/dolphinscheduler-dist/pom.xml @@ -73,11 +73,6 @@ org.apache.dolphinscheduler dolphinscheduler-tools - - - org.apache.dolphinscheduler - dolphinscheduler-python - @@ -126,35 +121,5 @@ - - - python - - - - maven-assembly-plugin - - - - python - - single - - package - - - python - false - - src/main/assembly/dolphinscheduler-python-api.xml - - - - - - - - - diff --git a/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-python-api.xml b/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-python-api.xml deleted file mode 100644 index cd37acee62..0000000000 --- a/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-python-api.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - python-api - - dir - - false - - - - ${basedir}/../dolphinscheduler-python/pydolphinscheduler/dist - . - - - diff --git a/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-src.xml b/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-src.xml index 05d54871c7..3ccc60ef0a 100644 --- a/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-src.xml +++ b/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-src.xml @@ -57,13 +57,6 @@ **/dolphinscheduler-ui/node/** **/dolphinscheduler-ui/node_modules/** - - **/dolphinscheduler-python/pydolphinscheduler/.pytest_cache/** - **/dolphinscheduler-python/pydolphinscheduler/build/** - **/dolphinscheduler-python/pydolphinscheduler/dist/** - **/dolphinscheduler-python/pydolphinscheduler/dist/** - **/dolphinscheduler-python/pydolphinscheduler/htmlcov/** - **/.settings/** **/.project diff --git a/dolphinscheduler-python/pom.xml b/dolphinscheduler-python/pom.xml deleted file mode 100644 index a3133a52e7..0000000000 --- a/dolphinscheduler-python/pom.xml +++ /dev/null @@ -1,165 +0,0 @@ - - - - 4.0.0 - - org.apache.dolphinscheduler - dolphinscheduler - dev-SNAPSHOT - - dolphinscheduler-python - jar - ${project.artifactId} - - - - release - - false - - - - python - - - - org.codehaus.mojo - exec-maven-plugin - - - python-api-prepare - - exec - - prepare-package - - python - ${project.basedir}/pydolphinscheduler - - -m - pip - install - --upgrade - pip - .[build] - - - - - python-api-clean - - exec - - prepare-package - - python - ${project.basedir}/pydolphinscheduler - - setup.py - pre_clean - - - - - python-api-build - - exec - - prepare-package - - python - ${project.basedir}/pydolphinscheduler - - -m - build - - - - - - python-pkg-rename-tar - - exec - - prepare-package - - bash - ${project.basedir}/pydolphinscheduler - - -c - mv dist/apache-dolphinscheduler-*.tar.gz dist/apache-dolphinscheduler-python-${project.version}.tar.gz - - - - - python-pkg-rename-whl - - exec - - prepare-package - - bash - ${project.basedir}/pydolphinscheduler - - -c - mv dist/apache_dolphinscheduler-*py3-none-any.whl dist/apache_dolphinscheduler-python-${project.version}-py3-none-any.whl - - - - - sign-source - - exec - - prepare-package - - ${python.sign.skip} - bash - ${project.basedir}/pydolphinscheduler - - -c - - gpg --armor --detach-sign --digest-algo=SHA512 dist/*.tar.gz - - - - - sign-wheel - - exec - - prepare-package - - ${python.sign.skip} - bash - ${project.basedir}/pydolphinscheduler - - -c - - gpg --armor --detach-sign --digest-algo=SHA512 dist/*.whl - - - - - - - - - - diff --git a/dolphinscheduler-python/pydolphinscheduler/.coveragerc b/dolphinscheduler-python/pydolphinscheduler/.coveragerc deleted file mode 100644 index 16205094c2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/.coveragerc +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[run] -command_line = -m pytest -omit = - # Ignore all test cases in tests/ - tests/* - # Ignore examples directory - */pydolphinscheduler/examples/* - # TODO. Temporary ignore java_gateway file, because we could not find good way to test it. - */pydolphinscheduler/java_gateway.py - -[report] -# Don’t report files that are 100% covered -skip_covered = True -show_missing = True -precision = 2 -# Report will fail when coverage under 90.00% -fail_under = 90 diff --git a/dolphinscheduler-python/pydolphinscheduler/.isort.cfg b/dolphinscheduler-python/pydolphinscheduler/.isort.cfg deleted file mode 100644 index 70fa2e05bd..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/.isort.cfg +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[settings] -profile=black diff --git a/dolphinscheduler-python/pydolphinscheduler/DEVELOP.md b/dolphinscheduler-python/pydolphinscheduler/DEVELOP.md deleted file mode 100644 index eac4b3678a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/DEVELOP.md +++ /dev/null @@ -1,265 +0,0 @@ - - -# Develop - -pydolphinscheduler is python API for Apache DolphinScheduler, it just defines what workflow look like instead of -store or execute it. We here use [py4j][py4j] to dynamically access Java Virtual Machine. - -## Setup Develop Environment - -**PyDolphinScheduler** use GitHub to hold all source code, you should clone the code before you do same change. - -```shell -git clone git@github.com:apache/dolphinscheduler.git -``` - -Now, we should install all dependence to make sure we could run test or check code style locally - -```shell -cd dolphinscheduler/dolphinscheduler-python/pydolphinscheduler -python -m pip install -e '.[dev]' -``` - -Next, we have to open pydolphinscheduler project in you editor. We recommend you use [pycharm][pycharm] -instead of [IntelliJ IDEA][idea] to open it. And you could just open directory -`dolphinscheduler-python/pydolphinscheduler` instead of `dolphinscheduler-python`. - -## Brief Concept - -Apache DolphinScheduler is design to define workflow by UI, and pydolphinscheduler try to define it by code. When -define by code, user usually do not care user, tenant, or queue exists or not. All user care about is created -a new workflow by the code his/her definition. So we have some **side object** in `pydolphinscheduler/side` -directory, their only check object exists or not, and create them if not exists. - -### Process Definition - -pydolphinscheduler workflow object name, process definition is also same name as Java object(maybe would be change to -other word for more simple). - -### Tasks - -pydolphinscheduler tasks object, we use tasks to define exact job we want DolphinScheduler do for us. For now, -we only support `shell` task to execute shell task. [This link][all-task] list all tasks support in DolphinScheduler -and would be implemented in the further. - -## Test Your Code - -Linting and tests is very important for open source project, so we pay more attention to it. We have continuous -integration service run by GitHub Action to test whether the patch is good or not, which you could jump to -section [With GitHub Action](#with-github-action) see more detail. - -And to make more convenience to local tests, we also have the way to run your [test automated with tox](#automated-testing-with-tox) -locally(*run all tests except integrate test with need docker environment*). It is helpful when your try to find out the -detail when continuous integration in GitHub Action failed, or you have a great patch and want to test local first. - -Besides [automated testing with tox](#automated-testing-with-tox) locally, we also have a [manual way](#manually) -run tests. And it is scattered commands to reproduce each step of the integration test we told about. - -* Remote - * [With GitHub Action](#with-github-action) -* Local - * [Automated Testing With tox](#automated-testing-with-tox)(including all but integrate test) - * [Manually](#manually)(with integrate test) - -### With GitHub Action - -GitHub Action test in various environment for pydolphinscheduler, including different python version in -`3.6|3.7|3.8|3.9` and operating system `linux|macOS|windows`. It will trigger and run automatically when you -submit pull requests to `apache/dolphinscheduler`. - -### Automated Testing With tox - -[tox](https://tox.wiki) is a package aims to automate and standardize testing in Python, both our continuous -integration and local test use it to run actual task. To use it, you should install it first - -```shell -python -m pip install --upgrade tox -``` - -After installation, you could run a single command to run all the tests, it is almost like test in GitHub Action -but not so much different environment. - -```shell -tox -e local-ci -``` - -It will take a while when you run it the first time, because it has to install dependencies and make some prepare, -and the next time you run it will be faster. - -If you failed section `lint` when you run command `tox -e local-ci`, you could try to run command `tox -e auto-lint` -which we provider fix as many lints as possible. When I finish, you could run command `tox -e local-ci` to see -whether the linter pass or not, you have to fix it by yourself if linter still fail. - -### Manually - -#### Code Style - -We use [isort][isort] to automatically keep Python imports alphabetically, and use [Black][black] for code -formatter and [Flake8][flake8] for pep8 checker. If you use [pycharm][pycharm]or [IntelliJ IDEA][idea], -maybe you could follow [Black-integration][black-editor] to configure them in your environment. - -Our Python API CI would automatically run code style checker and unittest when you submit pull request in -GitHub, you could also run static check locally. - -We recommend [pre-commit](https://pre-commit.com/) to do the checker mentioned above before you develop locally. -You should install `pre-commit` by running - -```shell -python -m pip install pre-commit -``` - -in your development environment and then run `pre-commit install` to set up the git hooks scripts. After finish -above steps, each time you run `git commit` or `git push` would run pre-commit check to make basic check before -you create pull requests in GitHub. - -```shell -# We recommend you run isort and Black before Flake8, because Black could auto fix some code style issue -# but Flake8 just hint when code style not match pep8 - -# Run Isort -python -m isort . - -# Run Black -python -m black . - -# Run Flake8 -python -m flake8 -``` - -#### Testing - -## Build Document - -We use [sphinx][sphinx] to build docs. Dolphinscheduler Python API CI would automatically build docs when you submit pull request in -GitHub. You may locally ensure docs could be built successfully in case the failure blocks CI, you can build by tox or manual. - -### Build Document Automatically with tox - -We integrated document build process into tox, you can build the latest document and all document(including history documents) via -single command - -```shell -# Build the latest document in dev branch -tox -e doc-build -# Build all documents, which including the latest and all history documents -tox -e doc-build-multi -``` - -### Build Document Manually - -To build docs locally, install sphinx and related python modules first via: - -```shell -python -m pip install '.[doc]' -``` - -Then go to document directory and execute the build command - -```shell -cd pydolphinscheduler/docs/ -make clean && make html -``` - -> NOTE: We support build multiple versions of documents with [sphinx-multiversion](https://holzhaus.github.io/sphinx-multiversion/master/index.html), -> you can build with command `git fetch --tags && make clean && make multiversion` - -## Testing - -pydolphinscheduler using [pytest][pytest] to test our codebase. GitHub Action will run our test when you create -pull request or commit to dev branch, with python version `3.6|3.7|3.8|3.9` and operating system `linux|macOS|windows`. - -pydolphinscheduler using [pytest][pytest] to run all tests in directory `tests`. You could run tests by the commands - -```shell -python -m pytest --cov=pydolphinscheduler --cov-config=.coveragerc tests/ -``` - -Besides run tests, it will also check the unit test [coverage][coverage] threshold, for now when test cover less than 90% -will fail the coverage, as well as our GitHub Action. - -The command above will check test coverage automatically, and you could also test the coverage by command. - -```shell -python -m coverage run && python -m coverage report -``` - -It would not only run unit test but also show each file coverage which cover rate less than 100%, and `TOTAL` -line show you total coverage of you code. If your CI failed with coverage you could go and find some reason by -this command output. - -### Integrate Test - -Integrate Test can not run when you execute command `tox -e local-ci` because it needs external environment -including [Docker](https://docs.docker.com/get-docker/) and specific image build by [maven](https://maven.apache.org/install.html). -Here we would show you the step to run integrate test in directory `dolphinscheduler-python/pydolphinscheduler/tests/integration`. -There are two ways to run integrate tests. - -#### Method 1: Launch Docker Container Locally - -```shell -# Go to project root directory and build Docker image -cd ../../ - -# Build Docker image -./mvnw -B clean install \ - -Dmaven.test.skip \ - -Dmaven.javadoc.skip \ - -Dmaven.checkstyle.skip \ - -Pdocker,release -Ddocker.tag=ci \ - -pl dolphinscheduler-standalone-server -am - -# Go to pydolphinscheduler root directory and run integrate tests -tox -e integrate-test -``` - -#### Method 2: Start Standalone Server in IntelliJ IDEA - -```shell -# Start the standalone server in IDEA - -# Go to pydolphinscheduler root directory and run integrate tests -tox -e local-integrate-test -``` - -## Add LICENSE When New Dependencies Adding - -When you add a new package in pydolphinscheduler, you should also add the package's LICENSE to directory -`dolphinscheduler-dist/release-docs/licenses/python-api-licenses`, and also add a short description to -`dolphinscheduler-dist/release-docs/LICENSE`. - -## Update `UPDATING.md` when public class, method or interface is be changed - -When you change public class, method or interface, you should change the [UPDATING.md](./UPDATING.md) to notice -users who may use it in other way. - -## Reference - -[py4j]: https://www.py4j.org/index.html -[pycharm]: https://www.jetbrains.com/pycharm -[idea]: https://www.jetbrains.com/idea/ -[all-task]: https://dolphinscheduler.apache.org/en-us/docs/dev/user_doc/guide/task/shell.html -[pytest]: https://docs.pytest.org/en/latest/ -[black]: https://black.readthedocs.io/en/stable/index.html -[flake8]: https://flake8.pycqa.org/en/latest/index.html -[black-editor]: https://black.readthedocs.io/en/stable/integrations/editors.html#pycharm-intellij-idea -[coverage]: https://coverage.readthedocs.io/en/stable/ -[isort]: https://pycqa.github.io/isort/index.html -[sphinx]: https://www.sphinx-doc.org/en/master - diff --git a/dolphinscheduler-python/pydolphinscheduler/LICENSE b/dolphinscheduler-python/pydolphinscheduler/LICENSE deleted file mode 100644 index a7359fad35..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/LICENSE +++ /dev/null @@ -1,228 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -============================================================================ -Apache DolphinScheduler Python API SUBCOMPONENTS: - -The Apache DolphinScheduler Python API project contains subcomponents -with separate copyright notices and license terms. Your use of the source -code for the these subcomponents is subject to the terms and conditions -of the following licenses. - -======================================================================== -BSD licenses -======================================================================== - -The following components are provided under a BSD license. See project link for details. -The text of each license is also included at licenses/LICENSE-[project].txt. - - py4j v0.10 (https://github.com/py4j/py4j) - click v8.0 (https://github.com/pallets/click) - -======================================================================== -MIT licenses -======================================================================== - -The following components are provided under the MIT License. See project link for details. -The text of each license is also included at licenses/LICENSE-[project].txt. - - ruamel.yaml v0.17 (https://sourceforge.net/projects/ruamel-yaml/) diff --git a/dolphinscheduler-python/pydolphinscheduler/NOTICE b/dolphinscheduler-python/pydolphinscheduler/NOTICE deleted file mode 100644 index 61acdab5d8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Apache DolphinScheduler -Copyright 2017-2022 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/dolphinscheduler-python/pydolphinscheduler/README.md b/dolphinscheduler-python/pydolphinscheduler/README.md deleted file mode 100644 index 7fc73d6a29..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/README.md +++ /dev/null @@ -1,90 +0,0 @@ - - -# pydolphinscheduler - -[![PyPi Version](https://img.shields.io/pypi/v/apache-dolphinscheduler.svg?style=flat-square&logo=PyPi)](https://pypi.org/project/apache-dolphinscheduler/) -[![PyPi Python Versions](https://img.shields.io/pypi/pyversions/apache-dolphinscheduler.svg?style=flat-square&logo=python)](https://pypi.org/project/apache-dolphinscheduler/) -[![PyPi License](https://img.shields.io/pypi/l/apache-dolphinscheduler.svg?style=flat-square)](https://pypi.org/project/apache-dolphinscheduler/) -[![PyPi Status](https://img.shields.io/pypi/status/apache-dolphinscheduler.svg?style=flat-square)](https://pypi.org/project/apache-dolphinscheduler/) -[![PyPi Downloads](https://img.shields.io/pypi/dm/apache-dolphinscheduler?style=flat-square)](https://pypi.org/project/apache-dolphinscheduler/) - -[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/psf/black) -[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat-square&labelColor=ef8336)](https://pycqa.github.io/isort) -[![GitHub Build](https://github.com/apache/dolphinscheduler/actions/workflows/py-ci.yml/badge.svg?branch=dev)](https://github.com/apache/dolphinscheduler/actions?query=workflow%3A%22Python+API%22) - -**PyDolphinScheduler** is python API for Apache DolphinScheduler, which allow you definition -your workflow by python code, aka workflow-as-codes. - -## Quick Start - -### Installation - -```shell -# Install -python -m pip install apache-dolphinscheduler - -# Verify installation is successful, it will show the version of apache-dolphinscheduler, here we use 0.1.0 as example -pydolphinscheduler version -# 0.1.0 -``` - -> NOTE: package apache-dolphinscheduler not work on above Python version 3.10(including itself) in Window operating system -> due to dependence [py4j](https://pypi.org/project/py4j/) not work on those environments. - -Here we show you how to install and run a simple example of pydolphinscheduler - -### Start Server And Run Example - -Before you run an example, you have to start backend server. You could follow -[development setup](../../docs/docs/en/contribute/development-environment-setup.md) -section "DolphinScheduler Standalone Quick Start" to set up developer environment. You have to start backend -and frontend server in this step, which mean that you could view DolphinScheduler UI in your browser with URL -http://localhost:12345/dolphinscheduler - -After backend server is being start, all requests from `pydolphinscheduler` would be sent to backend server. -And for now we could run a simple example by: - - - -```shell -# Please make sure your terminal could -curl https://raw.githubusercontent.com/apache/dolphinscheduler/dev/dolphinscheduler-python/pydolphinscheduler/examples/tutorial.py -o ./tutorial.py -python ./tutorial.py -``` - -> **_NOTICE:_** Since Apache DolphinScheduler's tenant is requests while running command, you might need to change -> tenant value in `example/tutorial.py`. For now the value is `tenant_exists`, please change it to username exists -> in you environment. - -After command execute, you could see a new project with single process definition named *tutorial* in the -[UI-project list](https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/project/project-list.html). - -## Develop - -Until now, we finish quick start by an example of pydolphinscheduler and run it. If you want to inspect or join -pydolphinscheduler develop, you could take a look at [develop](./DEVELOP.md) - -## Release - -If you are interested in how to release **PyDolphinScheduler**, you could go and see at [release](./RELEASE.md) - -## What's more - -For more detail information, please go to see **PyDolphinScheduler** latest(unreleased) [document](https://dolphinscheduler.apache.org/python/dev/index.html) diff --git a/dolphinscheduler-python/pydolphinscheduler/RELEASE.md b/dolphinscheduler-python/pydolphinscheduler/RELEASE.md deleted file mode 100644 index e00ef05beb..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/RELEASE.md +++ /dev/null @@ -1,35 +0,0 @@ - - -# Release - -**PyDolphinScheduler** office release is in [ASF Distribution Directory](https://downloads.apache.org/dolphinscheduler/), -and it should be released together with [apache-dolphinscheduler](https://github.com/apache/dolphinscheduler). - -## To ASF Distribution Directory - -You could release to [ASF Distribution Directory](https://downloads.apache.org/dolphinscheduler/) according to -[release guide](../../docs/docs/en/contribute/release/release-prepare.md) in DolphinScheduler -website. - -## To PyPi - -[PyPI](https://pypi.org), Python Package Index, is a repository of software for the Python programming language. -User could install Python package from it. Release to PyPi make user easier to install and try PyDolphinScheduler, -There is an official way to package project from [PyPA](https://packaging.python.org/en/latest/tutorials/packaging-projects) diff --git a/dolphinscheduler-python/pydolphinscheduler/UPDATING.md b/dolphinscheduler-python/pydolphinscheduler/UPDATING.md deleted file mode 100644 index b298c3b1ad..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/UPDATING.md +++ /dev/null @@ -1,40 +0,0 @@ - - -# UPDATING - -Updating is try to document non-backward compatible updates which notice users the detail changes about pydolphinscheduler. -It started after version 2.0.5 released - -## dev - -* Remove parameter ``task_location`` in process definition and Java Gateway service ([#11681](https://github.com/apache/dolphinscheduler/pull/11681)) -* Remove the spark version of spark task ([#11860](https://github.com/apache/dolphinscheduler/pull/11860)). - -## 3.0.0 - -* Integrate Python gateway server into Dolphinscheduler API server, and you could start Python gateway service by command - `./bin/dolphinscheduler-daemon.sh start api-server` instead of independent command - `./bin/dolphinscheduler-daemon.sh start python-gateway-server`. -* Remove parameter `queue` from class `ProcessDefinition` to avoid confuse user when it change but not work -* Change `yaml_parser.py` method `to_string` to magic method `__str__` make it more pythonic. -* Use package ``ruamel.yaml`` replace ``pyyaml`` for write yaml file with comment. -* Change variable about where to keep pydolphinscheduler configuration from ``PYDOLPHINSCHEDULER_HOME`` to - ``PYDS_HOME`` which is same as other environment variable name. - diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/Makefile b/dolphinscheduler-python/pydolphinscheduler/docs/Makefile deleted file mode 100644 index ff2c4ebb44..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/Makefile +++ /dev/null @@ -1,44 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. - -# Add opts `turn warnings into errors` strict sphinx-build behavior -SPHINXOPTS ?= -W -SPHINXBUILD ?= sphinx-build -SPHINXMULTIVERSION ?= sphinx-multiversion -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -# Create multiple version of docs -multiversion: - @$(SPHINXMULTIVERSION) "$(SOURCEDIR)" "$(BUILDDIR)/html" diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/make.bat b/dolphinscheduler-python/pydolphinscheduler/docs/make.bat deleted file mode 100644 index feac4c92c0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/make.bat +++ /dev/null @@ -1,54 +0,0 @@ -REM Licensed to the Apache Software Foundation (ASF) under one -REM or more contributor license agreements. See the NOTICE file -REM distributed with this work for additional information -REM regarding copyright ownership. The ASF licenses this file -REM to you under the Apache License, Version 2.0 (the -REM "License"); you may not use this file except in compliance -REM with the License. You may obtain a copy of the License at -REM -REM http://www.apache.org/licenses/LICENSE-2.0 -REM -REM Unless required by applicable law or agreed to in writing, -REM software distributed under the License is distributed on an -REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -REM KIND, either express or implied. See the License for the -REM specific language governing permissions and limitations -REM under the License. - -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build -REM Add opts `turn warnings into errors` strict sphinx-build behavior -set SPHINXOPTS=-W - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/_static/.gitkeep b/dolphinscheduler-python/pydolphinscheduler/docs/source/_static/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versioning.html b/dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versioning.html deleted file mode 100644 index 47136c45cf..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versioning.html +++ /dev/null @@ -1,27 +0,0 @@ -{# - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -#} - -{% if versions %} -

{{ _('Versions') }}

- -{% endif %} diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versions.html b/dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versions.html deleted file mode 100644 index 51b7271e9c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versions.html +++ /dev/null @@ -1,46 +0,0 @@ -{# - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -#} - -{%- if current_version %} -
- - Other Versions - v: {{ current_version.name }} - - -
- {%- if versions.tags %} -
-
Tags
- {%- for item in versions.tags %} -
{{ item.name }}
- {%- endfor %} -
- {%- endif %} - {%- if versions.branches %} -
-
Branches
- {%- for item in versions.branches %} -
{{ item.name }}
- {%- endfor %} -
- {%- endif %} -
-
-{%- endif %} diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst deleted file mode 100644 index b170b6f870..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst +++ /dev/null @@ -1,47 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -API -=== - -Core ----- - -.. automodule:: pydolphinscheduler.core - :inherited-members: - -Models ------- - -.. automodule:: pydolphinscheduler.models - :inherited-members: - -Tasks ------ - -.. automodule:: pydolphinscheduler.tasks - :inherited-members: - -Constants ---------- - -.. automodule:: pydolphinscheduler.constants - -Exceptions ----------- - -.. automodule:: pydolphinscheduler.exceptions diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/cli.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/cli.rst deleted file mode 100644 index 60e8231abf..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/cli.rst +++ /dev/null @@ -1,36 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Command Line Interface -====================== - -*PyDolphinScheduler* have mechanism call CLI(command line interface) to help user control it in Shell. - -Prepare -------- - -You have to :ref:`install PyDolphinScheduler ` first before you using -its CLI - -Usage ------ - -Here is basic usage about the command line of *PyDolphinScheduler* - -.. click:: pydolphinscheduler.cli.commands:cli - :prog: pydolphinscheduler - :nested: full diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst deleted file mode 100644 index 9a9527df1d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst +++ /dev/null @@ -1,151 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Concepts -======== - -In this section, you would know the core concepts of *PyDolphinScheduler*. - -Process Definition ------------------- - -Process definition describe the whole things except `tasks`_ and `tasks dependence`_, which including -name, schedule interval, schedule start time and end time. You would know scheduler - -Process definition could be initialized in normal assign statement or in context manger. - -.. code-block:: python - - # Initialization with assign statement - pd = ProcessDefinition(name="my first process definition") - - # Or context manger - with ProcessDefinition(name="my first process definition") as pd: - pd.submit() - -Process definition is the main object communicate between *PyDolphinScheduler* and DolphinScheduler daemon. -After process definition and task is be declared, you could use `submit` and `run` notify server your definition. - -If you just want to submit your definition and create workflow, without run it, you should use attribute `submit`. -But if you want to run the workflow after you submit it, you could use attribute `run`. - -.. code-block:: python - - # Just submit definition, without run it - pd.submit() - - # Both submit and run definition - pd.run() - -Schedule -~~~~~~~~ - -We use parameter `schedule` determine the schedule interval of workflow, *PyDolphinScheduler* support seven -asterisks expression, and each of the meaning of position as below - -.. code-block:: text - - * * * * * * * - ┬ ┬ ┬ ┬ ┬ ┬ ┬ - │ │ │ │ │ │ │ - │ │ │ │ │ │ └─── year - │ │ │ │ │ └───── day of week (0 - 7) (0 to 6 are Sunday to Saturday, or use names; 7 is Sunday, the same as 0) - │ │ │ │ └─────── month (1 - 12) - │ │ │ └───────── day of month (1 - 31) - │ │ └─────────── hour (0 - 23) - │ └───────────── min (0 - 59) - └─────────────── second (0 - 59) - -Here we add some example crontab: - -- `0 0 0 * * ? *`: Workflow execute every day at 00:00:00. -- `10 2 * * * ? *`: Workflow execute hourly day at ten pass two. -- `10,11 20 0 1,2 * ? *`: Workflow execute first and second day of month at 00:20:10 and 00:20:11. - -Tenant -~~~~~~ - -Tenant is the user who run task command in machine or in virtual machine. it could be assign by simple string. - -.. code-block:: python - - # - pd = ProcessDefinition(name="process definition tenant", tenant="tenant_exists") - -.. note:: - - Make should tenant exists in target machine, otherwise it will raise an error when you try to run command - -Tasks ------ - -Task is the minimum unit running actual job, and it is nodes of DAG, aka directed acyclic graph. You could define -what you want to in the task. It have some required parameter to make uniqueness and definition. - -Here we use :py:meth:`pydolphinscheduler.tasks.Shell` as example, parameter `name` and `command` is required and must be provider. Parameter -`name` set name to the task, and parameter `command` declare the command you wish to run in this task. - -.. code-block:: python - - # We named this task as "shell", and just run command `echo shell task` - shell_task = Shell(name="shell", command="echo shell task") - -If you want to see all type of tasks, you could see :doc:`tasks/index`. - -Tasks Dependence -~~~~~~~~~~~~~~~~ - -You could define many tasks in on single `Process Definition`_. If all those task is in parallel processing, -then you could leave them alone without adding any additional information. But if there have some tasks should -not be run unless pre task in workflow have be done, we should set task dependence to them. Set tasks dependence -have two mainly way and both of them is easy. You could use bitwise operator `>>` and `<<`, or task attribute -`set_downstream` and `set_upstream` to do it. - -.. code-block:: python - - # Set task1 as task2 upstream - task1 >> task2 - # You could use attribute `set_downstream` too, is same as `task1 >> task2` - task1.set_downstream(task2) - - # Set task1 as task2 downstream - task1 << task2 - # It is same as attribute `set_upstream` - task1.set_upstream(task2) - - # Beside, we could set dependence between task and sequence of tasks, - # we set `task1` is upstream to both `task2` and `task3`. It is useful - # for some tasks have same dependence. - task1 >> [task2, task3] - -Task With Process Definition -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In most of data orchestration cases, you should assigned attribute `process_definition` to task instance to -decide workflow of task. You could set `process_definition` in both normal assign or in context manger mode - -.. code-block:: python - - # Normal assign, have to explicit declaration and pass `ProcessDefinition` instance to task - pd = ProcessDefinition(name="my first process definition") - shell_task = Shell(name="shell", command="echo shell task", process_definition=pd) - - # Context manger, `ProcessDefinition` instance pd would implicit declaration to task - with ProcessDefinition(name="my first process definition") as pd: - shell_task = Shell(name="shell", command="echo shell task", - -With both `Process Definition`_, `Tasks`_ and `Tasks Dependence`_, we could build a workflow with multiple tasks. diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py b/dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py deleted file mode 100644 index 23fc117fb7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py +++ /dev/null @@ -1,121 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. - -import os -import sys -from pathlib import Path - -# For sphinx-multiversion, we need to build API docs of the corresponding package version, related issue: -# https://github.com/Holzhaus/sphinx-multiversion/issues/42 -pkg_src_dir = ( - Path(os.environ.get("SPHINX_MULTIVERSION_SOURCEDIR", default=".")) - .joinpath("../../src") - .resolve() -) -sys.path.insert(0, str(pkg_src_dir)) -# Debug to uncomment this to see the source path -# print("=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=") -# print(pkg_src_dir) -# [print(p) for p in sys.path] -# print("=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=") - - -# -- Project information ----------------------------------------------------- - -project = "pydolphinscheduler" -copyright = "2022, apache" -author = "apache dolphinscheduler contributors" - -# The full version, including alpha/beta/rc tags -release = "0.0.1" - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - # Measures durations of Sphinx processing - "sphinx.ext.duration", - # Semi-automatic make docstrings to document - "sphinx.ext.autodoc", - "sphinx.ext.viewcode", - "sphinx.ext.autosectionlabel", - "sphinx_rtd_theme", - # Documenting command line interface - "sphinx_click.ext", - # Add inline tabbed content - "sphinx_inline_tabs", - "sphinx_copybutton", - "sphinx_multiversion", -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# sphinx_multiversion configuration -html_sidebars = { - "**": [ - "versioning.html", - ], -} -# Match all exists tag for pydolphinscheduler expect version 2.0.4(not release apache dolphinscheduler) -smv_tag_whitelist = r"^(?!2.0.4)\d+\.\d+\.\d+$" -smv_branch_whitelist = "dev" -smv_remote_whitelist = r"^(origin|upstream)$" -smv_released_pattern = "^refs/tags/.*$" -smv_outputdir_format = "versions/{ref.name}" - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - -autodoc_default_options = { - "members": True, - "show-inheritance": True, - "private-members": True, - "undoc-members": True, - "member-order": "groupwise", -} - -autosectionlabel_prefix_document = True - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/config.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/config.rst deleted file mode 100644 index 29a143d713..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/config.rst +++ /dev/null @@ -1,218 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Configuration -============= - -pydolphinscheduler has a built-in module setting necessary configuration to start and run your workflow code. -You could directly use them if you only want to run a quick start or for a simple job like POC. But if you -want to deep use pydolphinscheduler and even use it in production. You should probably need to modify and -change the built-in configuration. - -We have two ways to modify the configuration: - -- `Using Environment Variables`_: The more lightweight way to modify the configuration. it is useful in - containerization scenarios, like docker and k8s, or when you like to temporarily override configs in the - configuration file. -- `Using Configuration File`_: The more general way to modify the configuration. It is useful when you want - to persist and manage configuration files in one single file. - -Using Environment Variables ---------------------------- - -You could change the configuration by adding or modifying the operating system's environment variables. No -matter what way you used, as long as you can successfully modify the environment variables. We use two common -ways, `Bash `_ and `Python OS Module `_, as examples: - -By Bash -^^^^^^^ - -Setting environment variables via `Bash` is the most straightforward and easiest way. We give some examples about -how to change them by Bash. - -.. code-block:: bash - - # Modify Java Gateway Address - export PYDS_JAVA_GATEWAY_ADDRESS="192.168.1.1" - - # Modify Workflow Default User - export PYDS_WORKFLOW_USER="custom-user" - -After executing the commands above, both ``PYDS_JAVA_GATEWAY_ADDRESS`` and ``PYDS_WORKFLOW_USER`` will be changed. -The next time you execute and submit your workflow, it will submit to host `192.168.1.1`, and with workflow's user -named `custom-user`. - -By Python OS Module -^^^^^^^^^^^^^^^^^^^ - -pydolphinscheduler is a Python API for Apache DolphinScheduler, and you could modify or add system environment -variables via Python ``os`` module. In this example, we change variables as the same value as we change in -`Bash `_. It will take effect the next time you run your workflow, and call workflow ``run`` or ``submit`` -method next to ``os.environ`` statement. - -.. code-block:: python - - import os - # Modify Java Gateway Address - os.environ["PYDS_JAVA_GATEWAY_ADDRESS"] = "192.168.1.1" - - # Modify Workflow Default User - os.environ["PYDS_WORKFLOW_USER"] = "custom-user" - -All Configurations in Environment Variables -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -All environment variables as below, and you could modify their value via `Bash `_ or `Python OS Module `_ - -+------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Variable Section | Variable Name | description | -+==================+====================================+====================================================================================================================+ -| | ``PYDS_JAVA_GATEWAY_ADDRESS`` | Default Java gateway address, will use its value when it is set. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Java Gateway | ``PYDS_JAVA_GATEWAY_PORT`` | Default Java gateway port, will use its value when it is set. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_JAVA_GATEWAY_AUTO_CONVERT`` | Default boolean Java gateway auto convert, will use its value when it is set. | -+------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_USER_NAME`` | Default user name, will use when user's ``name`` when does not specify. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_USER_PASSWORD`` | Default user password, will use when user's ``password`` when does not specify. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Default User | ``PYDS_USER_EMAIL`` | Default user email, will use when user's ``email`` when does not specify. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_USER_PHONE`` | Default user phone, will use when user's ``phone`` when does not specify. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_USER_STATE`` | Default user state, will use when user's ``state`` when does not specify. | -+------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_PROJECT`` | Default workflow project name, will use its value when workflow does not specify the attribute ``project``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_TENANT`` | Default workflow tenant, will use its value when workflow does not specify the attribute ``tenant``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Default Workflow | ``PYDS_WORKFLOW_USER`` | Default workflow user, will use its value when workflow does not specify the attribute ``user``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_QUEUE`` | Default workflow queue, will use its value when workflow does not specify the attribute ``queue``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_WORKER_GROUP`` | Default workflow worker group, will use its value when workflow does not specify the attribute ``worker_group``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_RELEASE_STATE`` | Default workflow release state, will use its value when workflow does not specify the attribute ``release_state``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_TIME_ZONE`` | Default workflow worker group, will use its value when workflow does not specify the attribute ``timezone``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_WARNING_TYPE`` | Default workflow warning type, will use its value when workflow does not specify the attribute ``warning_type``. | -+------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------+ - -.. note:: - - The scope of setting configuration via environment variable is in the workflow, and it will not change the - value of the configuration file. The :doc:`CLI ` command ``config --get`` and ``config --set`` operate - the value of the configuration file, so the command ``config --get`` may return a different value from what - you set in the environment variable, and command ``config --get`` will never change your environment variable. - -Using Configuration File ------------------------- - -If you want to persist and manage configuration in a file instead of environment variables, or maybe you want -want to save your configuration file to a version control system, like Git or SVN, and the way to change -configuration by file is the best choice. - -Export Configuration File -^^^^^^^^^^^^^^^^^^^^^^^^^ - -pydolphinscheduler allows you to change the built-in configurations via CLI or editor you like. pydolphinscheduler -integrated built-in configurations in its package, but you could also export it locally by CLI - -.. code-block:: bash - - pydolphinscheduler config --init - -And it will create a new YAML file in the path `~/pydolphinscheduler/config.yaml` by default. If you want to export -it to another path, you should set `PYDS_HOME` before you run command :code:`pydolphinscheduler config --init`. - -.. code-block:: bash - - export PYDS_HOME= - pydolphinscheduler config --init - -After that, your configuration file will export into `/config.yaml` instead of the default path. - -Change Configuration -^^^^^^^^^^^^^^^^^^^^ - -In section `export configuration file`_ you export the configuration file locally, and as a local file, you could -edit it with any editor you like. After you save your change in your editor, the latest configuration will work -when you run your workflow code. - -You could also query or change the configuration via CLI :code:`config --get ` or :code:`config --get `. -Both `--get` and `--set` could be called one or more times in single command, and you could only set the leaf -node of the configuration but could get the parent configuration, there are simple examples below: - -.. code-block:: bash - - # Get single configuration in the leaf node, - # The output look like below: - # java_gateway.address = 127.0.0.1 - pydolphinscheduler config --get java_gateway.address - - # Get multiple configuration in the leaf node, - # The output look like below: - # java_gateway.address = 127.0.0.1 - # java_gateway.port = 25333 - pydolphinscheduler config --get java_gateway.address --get java_gateway.port - - - # Get parent configuration which contain multiple leaf nodes, - # The output look like below: - # java_gateway = ordereddict([('address', '127.0.0.1'), ('port', 25333), ('auto_convert', True)]) - pydolphinscheduler config --get java_gateway - - # Set single configuration, - # The output look like below: - # Set configuration done. - pydolphinscheduler config --set java_gateway.address 192.168.1.1 - - # Set multiple configuration - # The output look like below: - # Set configuration done. - pydolphinscheduler config --set java_gateway.address 192.168.1.1 --set java_gateway.port 25334 - - # Set configuration not in leaf node will fail - # The output look like below: - # Raise error. - pydolphinscheduler config --set java_gateway 192.168.1.1,25334,True - -For more information about our CLI, you could see document :doc:`cli`. - -All Configurations in File -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Here are all our configurations for pydolphinscheduler. - -.. literalinclude:: ../../src/pydolphinscheduler/default_config.yaml - :language: yaml - :lines: 18- - -Priority --------- - -We have two ways to modify the configuration and there is a built-in config in pydolphinscheduler too. It is -very important to understand the priority of the configuration when you use them. The overview of configuration -priority is. - -``Environment Variables > Configurations File > Built-in Configurations`` - -This means that your setting in environment variables or configurations file will overwrite the built-in one. -And you could temporarily modify configurations by setting environment variables without modifying the global -config in the configuration file. diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/howto/index.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/howto/index.rst deleted file mode 100644 index a0b3c29c0c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/howto/index.rst +++ /dev/null @@ -1,30 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -HOWTOs -====== - -pydolphinscheduler HOWTOs are documents that cover a single, specific topic, and attempt to cover it fairly -completely. This collection is an effort to foster documentation that is more detailed than the :doc:`../concept` -and :doc:`../tutorial`. - -Currently, the HOWTOs are: - -.. toctree:: - :maxdepth: 2 - - remote-submit diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/howto/remote-submit.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/howto/remote-submit.rst deleted file mode 100644 index b7efdf4fc0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/howto/remote-submit.rst +++ /dev/null @@ -1,51 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Submit Your Code from Different machine -======================================= - -Generally, we use pydolphinscheduler as a client to DolphinScheduler, and consider we may change our workflow -code frequently, the best practice is running :ref:`python gateway service ` -in your server machine and submit the workflow code from your development machine, like a laptop or PC. This behavior -is supported by pydolphinscheduler out of box with one or two single command lines. - -Export Configuration File -------------------------- - -.. code-block:: bash - - pydolphinscheduler config --init - -your could find more detail in :ref:`configuration exporting ` - -Run API Server in Other Host ----------------------------- - -.. code-block:: bash - - pydolphinscheduler config --set java_gateway.address - -your could find more detail in :ref:`configuration setting ` - -Run API Server in Other Port ----------------------------- - -.. code-block:: bash - - pydolphinscheduler config --set java_gateway.port - -your could find more detail in :ref:`configuration setting ` diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/index.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/index.rst deleted file mode 100644 index 4dc0a949c9..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/index.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -PyDolphinScheduler -================== - -**PyDolphinScheduler** is Python API for `Apache DolphinScheduler `_, -which allow you definition your workflow by Python code, aka workflow-as-codes. - -I could go and find how to :ref:`install ` the project. Or if you want to see simply example -then go and see :doc:`tutorial` for more detail. - - -.. toctree:: - :maxdepth: 2 - - start - tutorial - concept - tasks/index - howto/index - cli - config - api - resources_plugin/index - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/develop.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/develop.rst deleted file mode 100644 index e7d90ea03c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/develop.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -How to develop -============== - -When you want to create a new resource plugin, you need to add a new class in the module `resources_plugin`. - -The resource plugin class needs to inherit the abstract class `ResourcePlugin` and implement its abstract method `read_file` function. - -The parameter of the `__init__` function of `ResourcePlugin` is the prefix of STR type. You can override this function when necessary. - -The `read_file` function parameter of `ResourcePlugin` is the file suffix of STR type, and its return value is the file content, if it exists and is readable. - - -Example -------- -- Method `__init__`: Initiation method with `param`:`prefix` - -.. literalinclude:: ../../../src/pydolphinscheduler/resources_plugin/local.py - :start-after: [start init_method] - :end-before: [end init_method] - -- Method `read_file`: Get content from the given URI, The function parameter is the suffix of the file path. - -The file prefix has been initialized in init of the resource plugin. - -The prefix plus suffix is the absolute path of the file in this resource. - -.. literalinclude:: ../../../src/pydolphinscheduler/resources_plugin/local.py - :start-after: [start read_file_method] - :end-before: [end read_file_method] diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/github.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/github.rst deleted file mode 100644 index b3023377de..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/github.rst +++ /dev/null @@ -1,35 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -GitHub -====== - -`GitHub` is a github resource plugin for pydolphinscheduler. - -When using a github resource plugin, you only need to add the `resource_plugin` parameter in the task subclass or workflow definition, -such as `resource_plugin=GitHub(prefix="https://github.com/xxx", access_token="ghpxx")`. -The token parameter is optional. You need to add it when your repository is a private repository. - -You can view this `document `_ -when creating a token. - -For the specific use of resource plugins, you can see `How to use` in :doc:`resource-plugin` - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.resources_plugin.github \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/gitlab.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/gitlab.rst deleted file mode 100644 index fdf43c9d2f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/gitlab.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -GitLab -====== - -`GitLab` is a gitlab resource plugin for pydolphinscheduler. - -When using a gitlab resource plugin, you only need to add the `resource_plugin` parameter in the task subclass or workflow definition, -such as `resource_plugin=GitLab(prefix="xxx")`, if it is a public repository. - -If it is a private or Internal repository, you can use three ways to obtain authentication. - -The first is `Personal Access Tokens`, using `resource_plugin=GitLab(prefix="xxx", private_token="xxx")`. - -The second method is to obtain authentication through `username` and `password`: - -using `resource_plugin=GitLab(prefix="xxx", username="username", password="pwd")`. - -The third method is to obtain authentication through `OAuth Token`: - -using `resource_plugin=GitLab(prefix="xxx", oauth_token="xx")`. - -You can view this `document `_ -when creating a `Personal Access Tokens`. - -For the specific use of resource plugins, you can see `How to use` in :doc:`resource-plugin` - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.resources_plugin.gitlab \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/index.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/index.rst deleted file mode 100644 index c984f06048..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/index.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Resources_plugin -================ - -In this section - -.. toctree:: - :maxdepth: 1 - - develop - resource-plugin - local - github - gitlab - oss - s3 \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/local.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/local.rst deleted file mode 100644 index 5da025a5c7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/local.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Local -===== - -`Local` is a local resource plugin for pydolphinscheduler. - -When using a local resource plugin, you only need to add the `resource_plugin` parameter in the task subclass or workflow definition, -such as `resource_plugin=Local("/tmp")`. - - -For the specific use of resource plugins, you can see `How to use` in :doc:`./resource-plugin` - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.resources_plugin.local \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/oss.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/oss.rst deleted file mode 100644 index fbb6785d1d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/oss.rst +++ /dev/null @@ -1,44 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -OSS -=== - -`OSS` is a Aliyun OSS resource plugin for pydolphinscheduler. - -When using a OSS resource plugin, you only need to add the `resource_plugin` parameter in the task subclass or workflow definition, -such as `resource_plugin=OSS(prefix="xxx")`, if the file is publicly readable. - -When the file is private, using `resource_plugin=OSS(prefix="xxx", access_key_id="xxx", access_key_secret="xxx")` - -Notice -The read permission of files in a bucket is inherited from the bucket by default. In other words, if the bucket is private, -the files in it are also private. - -But the read permission of the files in the bucket can be changed, in other words, the files in the private bucket can also be read publicly. - -So whether the `AccessKey` is needed depends on whether the file is private or not. - -You can view this `document `_ -when creating a pair `AccessKey`. - -For the specific use of resource plugins, you can see `How to use` in :doc:`resource-plugin` - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.resources_plugin.OSS diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/resource-plugin.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/resource-plugin.rst deleted file mode 100644 index 2a32526208..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/resource-plugin.rst +++ /dev/null @@ -1,75 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -ResourcePlugin -============== - -`ResourcePlugin` is an abstract class of resource plug-in parameters of task subclass and workflow. -All resource plugins need to inherit and override its abstract methods. - -Code ----- -.. literalinclude:: ../../../src/pydolphinscheduler/core/resource_plugin.py - :start-after: [start resource_plugin_definition] - :end-before: [end resource_plugin_definition] - -Dive Into ---------- -It has the following key functions. - -- Method `__init__`: The `__init__` function has STR type parameter `prefix`, which means the prefix of the resource. - -You can rewrite this function if necessary. - -.. literalinclude:: ../../../src/pydolphinscheduler/core/resource_plugin.py - :start-after: [start init_method] - :end-before: [end init_method] - -- Method `read_file`: Get content from the given URI, The function parameter is the suffix of the file path. - -The file prefix has been initialized in init of the resource plug-in. - -The prefix plus suffix is the absolute path of the file in this resource. - -It is an abstract function. You must rewrite it - -.. literalinclude:: ../../../src/pydolphinscheduler/core/resource_plugin.py - :start-after: [start abstractmethod read_file] - :end-before: [end abstractmethod read_file] - -.. automodule:: pydolphinscheduler.core.resource_plugin - -How to use ----------- -Resource plugin can be used in task subclasses and workflows. You can use the resource plugin by adding the `resource_plugin` parameter when they are initialized. -For example, local resource plugin, add `resource_plugin = Local("/tmp")`. - -The resource plugin we currently support are `local`, `github`, `gitlab`, `OSS`, `S3`. - -Here is an example. - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/tutorial_resource_plugin.py - :start-after: [start workflow_declare] - :end-before: [end task_declare] - -When the resource_plugin parameter is defined in both the task subclass and the workflow, the resource_plugin defined in the task subclass is used first. - -If the task subclass does not define resource_plugin, but the resource_plugin is defined in the workflow, the resource_plugin in the workflow is used. - -Of course, if neither the task subclass nor the workflow specifies resource_plugin, the command at this time will be executed as a script, - -in other words, we are forward compatible. \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/s3.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/s3.rst deleted file mode 100644 index f5bc1d37fe..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/s3.rst +++ /dev/null @@ -1,36 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -S3 -== - -`S3` is a Amazon S3 resource plugin for pydolphinscheduler. - -When using a Amazon S3 resource plugin, you only need to add the `resource_plugin` parameter in the task subclass or workflow definition, -such as `resource_plugin=S3(prefix="xxx")`, if the file is publicly readable. - -When the file is private, using `resource_plugin=S3(prefix="xxx", access_key_id="xxx", access_key_secret="xxx")` - -You can view this `document `_ -when creating a pair `AccessKey`. - -For the specific use of resource plugins, you can see `How to use` in :doc:`resource-plugin` - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.resources_plugin.S3 diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst deleted file mode 100644 index 270b5b855d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst +++ /dev/null @@ -1,171 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Getting Started -=============== - -To get started with *PyDolphinScheduler* you must ensure python and pip -installed on your machine, if you're already set up, you can skip straight -to `Installing PyDolphinScheduler`_, otherwise please continue with -`Installing Python`_. - -Installing Python ------------------ - -How to install `python` and `pip` depends on what operating system -you're using. The python wiki provides up to date -`instructions for all platforms here`_. When you entering the website -and choice your operating system, you would be offered the choice and -select python version. *PyDolphinScheduler* recommend use version above -Python 3.6 and we highly recommend you install *Stable Releases* instead -of *Pre-releases*. - -After you have download and installed Python, you should open your terminal, -typing and running :code:`python --version` to check whether the installation -is correct or not. If all thing good, you could see the version in console -without error(here is a example after Python 3.8.7 installed) - -.. code-block:: bash - - python --version - -Will see detail of Python version, such as *Python 3.8.7* - -Installing PyDolphinScheduler ------------------------------ - -After Python is already installed on your machine following section -`installing Python`_, it easy to *PyDolphinScheduler* by pip. - -.. code-block:: bash - - python -m pip install apache-dolphinscheduler - -The latest version of *PyDolphinScheduler* would be installed after you run above -command in your terminal. You could go and `start Python Gateway Service`_ to finish -the prepare, and then go to :doc:`tutorial` to make your hand dirty. But if you -want to install the unreleased version of *PyDolphinScheduler*, you could go and see -section `installing PyDolphinScheduler in dev branch`_ for more detail. - -.. note:: - - Currently, we released multiple pre-release package in PyPI, you can see all released package - including pre-release in `release history `_. - You can fix the the package version if you want to install pre-release package, for example if - you want to install version `3.0.0-beta-2` package, you can run command - :code:`python -m pip install apache-dolphinscheduler==3.0.0b2`. - -Installing PyDolphinScheduler In DEV Branch -------------------------------------------- - -Because the project is developing and some of the features still not release. -If you want to try some thing unreleased you could install from the source code -which we hold in GitHub - -.. code-block:: bash - - # Clone Apache DolphinScheduler repository - git clone git@github.com:apache/dolphinscheduler.git - # Install PyDolphinScheduler in develop mode - cd dolphinscheduler-python/pydolphinscheduler && python -m pip install -e . - -After you installed *PyDolphinScheduler*, please remember `start Python Gateway Service`_ -which waiting for *PyDolphinScheduler*'s workflow definition require. - -Above command will clone whole dolphinscheduler source code to local, maybe you want to install latest pydolphinscheduler -package directly and do not care about other code(including Python gateway service code), you can execute command - -.. code-block:: bash - - # Must escape the '&' character by adding '\' - pip install -e "git+https://github.com/apache/dolphinscheduler.git#egg=apache-dolphinscheduler&subdirectory=dolphinscheduler-python/pydolphinscheduler" - -Start Python Gateway Service ----------------------------- - -Since **PyDolphinScheduler** is Python API for `Apache DolphinScheduler`_, it -could define workflow and tasks structure, but could not run it unless you -`install Apache DolphinScheduler`_ and start its API server which including -Python gateway service in it. We only and some key steps here and you could -go `install Apache DolphinScheduler`_ for more detail - -.. code-block:: bash - - # Start DolphinScheduler api-server which including python gateway service - ./bin/dolphinscheduler-daemon.sh start api-server - -To check whether the server is alive or not, you could run :code:`jps`. And -the server is health if keyword `ApiApplicationServer` in the console. - -.. code-block:: bash - - jps - # .... - # 201472 ApiApplicationServer - # .... - -.. note:: - - Please make sure you already enabled started Python gateway service along with `api-server`. The configuration is in - yaml config path `python-gateway.enabled : true` in api-server's configuration path in `api-server/conf/application.yaml`. - The default value is true and Python gateway service start when api server is been started. - -Run an Example --------------- - -Before run an example for pydolphinscheduler, you should get the example code from it source code. You could run -single bash command to get it - -.. code-block:: bash - - wget https://raw.githubusercontent.com/apache/dolphinscheduler/dev/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py - -or you could copy-paste the content from `tutorial source code`_. And then you could run the example in your -terminal - -.. code-block:: bash - - python tutorial.py - -If you want to submit your workflow to a remote API server, which means that your workflow script is different -from the API server, you should first change pydolphinscheduler configuration and then submit the workflow script - -.. code-block:: bash - - pydolphinscheduler config --init - pydolphinscheduler config --set java_gateway.address - python tutorial.py - -.. note:: - - You could see more information in :doc:`config` about all the configurations pydolphinscheduler supported. - -After that, you could go and see your DolphinScheduler web UI to find out a new workflow created by pydolphinscheduler, -and the path of web UI is `Project -> Workflow -> Workflow Definition`. - - -What's More ------------ - -If you do not familiar with *PyDolphinScheduler*, you could go to :doc:`tutorial` and see how it works. But -if you already know the basic usage or concept of *PyDolphinScheduler*, you could go and play with all -:doc:`tasks/index` *PyDolphinScheduler* supports, or see our :doc:`howto/index` about useful cases. - -.. _`instructions for all platforms here`: https://wiki.python.org/moin/BeginnersGuide/Download -.. _`Apache DolphinScheduler`: https://dolphinscheduler.apache.org -.. _`install Apache DolphinScheduler`: https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/installation/standalone.html -.. _`tutorial source code`: https://raw.githubusercontent.com/apache/dolphinscheduler/dev/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst deleted file mode 100644 index f6d7e6ad8f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst +++ /dev/null @@ -1,40 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Condition -========= - -A condition task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_condition_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.condition - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Condition.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst deleted file mode 100644 index cb67a2fa9e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Datax -===== - -A DataX task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_datax_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.datax - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/DataX.yaml - :start-after: # under the License. - :language: yaml - - -example_datax.json: - -.. literalinclude:: ../../../examples/yaml_define/example_datax.json - :language: json diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst deleted file mode 100644 index d8e1599b2d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst +++ /dev/null @@ -1,47 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Dependent -========= - -A dependent task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_dependent_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.dependent - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Dependent.yaml - :start-after: # under the License. - :language: yaml - -Dependent_External.yaml: - -.. literalinclude:: ../../../examples/yaml_define/Dependent_External.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dvc.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dvc.rst deleted file mode 100644 index 0127a982f3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dvc.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -DVC -=== - -A DVC task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_dvc_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.dvc - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Dvc.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst deleted file mode 100644 index 76eb484718..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst +++ /dev/null @@ -1,40 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Flink -===== - -A flink task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_flink_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.flink - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Flink.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/func_wrap.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/func_wrap.rst deleted file mode 100644 index a4a2972933..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/func_wrap.rst +++ /dev/null @@ -1,33 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Python Function Wrapper -======================= - -A decorator covert Python function into pydolphinscheduler's task. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/tutorial_decorator.py - :start-after: [start tutorial] - :end-before: [end tutorial] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.func_wrap diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst deleted file mode 100644 index 4e138c9989..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -HTTP -==== - -.. automodule:: pydolphinscheduler.tasks.http - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Http.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst deleted file mode 100644 index 3f83f92675..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst +++ /dev/null @@ -1,48 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Tasks -===== - -In this section - -.. toctree:: - :maxdepth: 1 - - func_wrap - shell - sql - python - http - - switch - condition - dependent - - spark - flink - map_reduce - procedure - - datax - sub_process - - sagemaker - mlflow - openmldb - pytorch - dvc diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst deleted file mode 100644 index 7356880b26..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Map Reduce -========== - - -A Map Reduce task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_map_reduce_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.map_reduce - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/MapReduce.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/mlflow.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/mlflow.rst deleted file mode 100644 index b83903c26f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/mlflow.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -MLflow -========= - - -A MLflow task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_mlflow_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.mlflow - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/mlflow.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/openmldb.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/openmldb.rst deleted file mode 100644 index 125313dc21..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/openmldb.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -OpenMLDB -========= - - -A OpenMLDB task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_openmldb_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.openmldb - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/OpenMLDB.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst deleted file mode 100644 index 2f28efc526..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Procedure -========= - -.. automodule:: pydolphinscheduler.tasks.procedure - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Procedure.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst deleted file mode 100644 index 1bf6210018..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Python -====== - -.. automodule:: pydolphinscheduler.tasks.python - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Python.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/pytorch.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/pytorch.rst deleted file mode 100644 index 4c7a5521fb..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/pytorch.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Pytorch -======= - - -A Pytorch task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_pytorch_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.pytorch - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Pytorch.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sagemaker.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sagemaker.rst deleted file mode 100644 index 36880d91d2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sagemaker.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -SageMaker -========= - - -A SageMaker task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_sagemaker_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.sagemaker - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Sagemaker.yaml - :start-after: # under the License. - :language: yaml - -example_sagemaker_params.json: - -.. literalinclude:: ../../../examples/yaml_define/example_sagemaker_params.json - :language: json diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst deleted file mode 100644 index 2dd106a3b8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Shell -===== - -A shell task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/tutorial.py - :start-after: [start workflow_declare] - :end-before: [end task_relation_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.shell - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Shell.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst deleted file mode 100644 index d5a51db91a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Spark -===== - -A spark task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_spark_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.spark - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Spark.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst deleted file mode 100644 index 52df042b74..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst +++ /dev/null @@ -1,35 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -SQL -=== - -.. automodule:: pydolphinscheduler.tasks.sql - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Sql.yaml - :start-after: # under the License. - :language: yaml - -example_sql.sql: - -.. literalinclude:: ../../../examples/yaml_define/example_sql.sql - :start-after: */ - :language: sql diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst deleted file mode 100644 index 894dd0fbad..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst +++ /dev/null @@ -1,38 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Sub Process -=========== - -.. automodule:: pydolphinscheduler.tasks.sub_process - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/SubProcess.yaml - :start-after: # under the License. - :language: yaml - - - -example_subprocess.yaml: - -.. literalinclude:: ../../../examples/yaml_define/example_sub_workflow.yaml - :start-after: # under the License. - :language: yaml - diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst deleted file mode 100644 index 2fef589efb..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Switch -====== - -A switch task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_switch_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.switch - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Switch.yaml - :start-after: # under the License. - :language: yaml - diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst deleted file mode 100644 index 57d21b2d29..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst +++ /dev/null @@ -1,319 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Tutorial -======== - -This tutorial shows you the basic concept of *PyDolphinScheduler* and tells all -things you should know before you submit or run your first workflow. If you -still have not installed *PyDolphinScheduler* and start DolphinScheduler, you -could go and see :ref:`how to getting start PyDolphinScheduler ` firstly. - -Overview of Tutorial --------------------- - -Here have an overview of our tutorial, and it looks a little complex but does not -worry about that because we explain this example below as detail as possible. - -There are two types of tutorials: traditional and task decorator. - -- **Traditional Way**: More general, support many :doc:`built-in task types `, it is convenient - when you build your workflow at the beginning. -- **Task Decorator**: A Python decorator allow you to wrap your function into pydolphinscheduler's task. Less - versatility to the traditional way because it only supported Python functions and without build-in tasks - supported. But it is helpful if your workflow is all built with Python or if you already have some Python - workflow code and want to migrate them to pydolphinscheduler. -- **YAML File**: We can use pydolphinscheduler CLI to create process using YAML file: :code:`pydolphinscheduler yaml -f tutorial.yaml`. - We can find more YAML file examples in `examples/yaml_define `_ - -.. tab:: Tradition - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :dedent: 0 - :start-after: [start tutorial] - :end-before: [end tutorial] - -.. tab:: Task Decorator - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py - :dedent: 0 - :start-after: [start tutorial] - :end-before: [end tutorial] - -.. tab:: YAML File - - .. literalinclude:: ../../examples/yaml_define/tutorial.yaml - :start-after: # under the License. - :language: yaml - -Import Necessary Module ------------------------ - -First of all, we should import the necessary module which we would use later just like other Python packages. - -.. tab:: Tradition - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :dedent: 0 - :start-after: [start package_import] - :end-before: [end package_import] - - In tradition tutorial we import :class:`pydolphinscheduler.core.process_definition.ProcessDefinition` and - :class:`pydolphinscheduler.tasks.shell.Shell`. - - If you want to use other task type you could click and :doc:`see all tasks we support ` - -.. tab:: Task Decorator - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py - :dedent: 0 - :start-after: [start package_import] - :end-before: [end package_import] - - In task decorator tutorial we import :class:`pydolphinscheduler.core.process_definition.ProcessDefinition` and - :func:`pydolphinscheduler.tasks.func_wrap.task`. - -Process Definition Declaration ------------------------------- - -We should instantiate :class:`pydolphinscheduler.core.process_definition.ProcessDefinition` object after we -import them from `import necessary module`_. Here we declare basic arguments for process definition(aka, workflow). -We define the name of :code:`ProcessDefinition`, using `Python context manager`_ and it **the only required argument** -for `ProcessDefinition`. Besides, we also declare three arguments named :code:`schedule` and :code:`start_time` -which setting workflow schedule interval and schedule start_time, and argument :code:`tenant` defines which tenant -will be running this task in the DolphinScheduler worker. See :ref:`section tenant ` in -*PyDolphinScheduler* :doc:`concept` for more information. - -.. tab:: Tradition - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :dedent: 0 - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -.. tab:: Task Decorator - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py - :dedent: 0 - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -.. tab:: YAML File - - .. literalinclude:: ../../examples/yaml_define/tutorial.yaml - :start-after: # under the License. - :end-before: # Define the tasks under the workflow - :language: yaml - -We could find more detail about :code:`ProcessDefinition` in :ref:`concept about process definition ` -if you are interested in it. For all arguments of object process definition, you could find in the -:class:`pydolphinscheduler.core.process_definition` API documentation. - -Task Declaration ----------------- - -.. tab:: Tradition - - We declare four tasks to show how to create tasks, and both of them are simple tasks of - :class:`pydolphinscheduler.tasks.shell` which runs `echo` command in the terminal. Besides the argument - `command` with :code:`echo` command, we also need to set the argument `name` for each task - *(not only shell task, `name` is required for each type of task)*. - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :dedent: 0 - :start-after: [start task_declare] - :end-before: [end task_declare] - - Besides shell task, *PyDolphinScheduler* supports multiple tasks and you could find in :doc:`tasks/index`. - -.. tab:: Task Decorator - - We declare four tasks to show how to create tasks, and both of them are created by the task decorator which - using :func:`pydolphinscheduler.tasks.func_wrap.task`. All we have to do is add a decorator named - :code:`@task` to existing Python function, and then use them inside :class:`pydolphinscheduler.core.process_definition` - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py - :dedent: 0 - :start-after: [start task_declare] - :end-before: [end task_declare] - - It makes our workflow more Pythonic, but be careful that when we use task decorator mode mean we only use - Python function as a task and could not use the :doc:`built-in tasks ` most of the cases. - -.. tab:: YAML File - - .. literalinclude:: ../../examples/yaml_define/tutorial.yaml - :start-after: # Define the tasks under the workflow - :language: yaml - -Setting Task Dependence ------------------------ - -After we declare both process definition and task, we have four tasks that are independent and will be running -in parallel. If you want to start one task until some task is finished, you have to set dependence on those -tasks. - -Set task dependence is quite easy by task's attribute :code:`set_downstream` and :code:`set_upstream` or by -bitwise operators :code:`>>` and :code:`<<` - -In this tutorial, task `task_parent` is the leading task of the whole workflow, then task `task_child_one` and -task `task_child_two` are its downstream tasks. Task `task_union` will not run unless both task `task_child_one` -and task `task_child_two` was done, because both two task is `task_union`'s upstream. - -.. tab:: Tradition - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :dedent: 0 - :start-after: [start task_relation_declare] - :end-before: [end task_relation_declare] - -.. tab:: Task Decorator - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py - :dedent: 0 - :start-after: [start task_relation_declare] - :end-before: [end task_relation_declare] - -.. tab:: YAML File - - We can use :code:`deps:[]` to set task dependence - - .. literalinclude:: ../../examples/yaml_define/tutorial.yaml - :start-after: # Define the tasks under the workflow - :language: yaml - -.. note:: - - We could set task dependence in batch mode if they have the same downstream or upstream by declaring those - tasks as task groups. In tutorial, We declare task `task_child_one` and `task_child_two` as task group named - `task_group`, then set `task_group` as downstream of task `task_parent`. You could see more detail in - :ref:`concept:Tasks Dependence` for more detail about how to set task dependence. - -Submit Or Run Workflow ----------------------- - -After that, we finish our workflow definition, with four tasks and task dependence, but all these things are -local, we should let the DolphinScheduler daemon know how the definition of workflow. So the last thing we -have to do is submit the workflow to the DolphinScheduler daemon. - -Fortunately, we have a convenient method to submit workflow via `ProcessDefinition` attribute :code:`run` which -will create workflow definition as well as workflow schedule. - -.. tab:: Tradition - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :dedent: 0 - :start-after: [start submit_or_run] - :end-before: [end submit_or_run] - -.. tab:: Task Decorator - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py - :dedent: 0 - :start-after: [start submit_or_run] - :end-before: [end submit_or_run] - -.. tab:: YAML File - - pydolphinscheduler YAML CLI always submit workflow. We can run the workflow if we set :code:`run: true` - - .. code-block:: yaml - - # Define the workflow - workflow: - name: "tutorial" - run: true - -At last, we could execute this workflow code in your terminal like other Python scripts, running -:code:`python tutorial.py` to trigger and execute it. - -.. note:: - - If you do not start your DolphinScheduler API server, you could find how to start it in - :ref:`start:start Python gateway service` for more detail. Besides attribute :code:`run`, we have attribute - :code:`submit` for object `ProcessDefinition` which just submits workflow to the daemon but does not set - the workflow schedule information. For more detail, you could see :ref:`concept:process definition`. - -DAG Graph After Tutorial Run ----------------------------- - -After we run the tutorial code, you could log in DolphinScheduler web UI, go and see the -`DolphinScheduler project page`_. They is a new process definition be created by *PyDolphinScheduler* and it -named "tutorial" or "tutorial_decorator". The task graph of workflow like below: - -.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :language: text - :lines: 24-28 - -Create Process Using YAML File ------------------------------- - -We can use pydolphinscheduler CLI to create process using YAML file - -.. code-block:: bash - - pydolphinscheduler yaml -f Shell.yaml - -We can use the following four special grammars to define workflows more flexibly. - -- :code:`$FILE{"file_name"}`: Read the file (:code:`file_name`) contents and replace them to that location. -- :code:`$WORKFLOW{"other_workflow.yaml"}`: Refer to another process defined using YAML file (:code:`other_workflow.yaml`) and replace the process name in this location. -- :code:`$ENV{env_name}`: Read the environment variable (:code:`env_name`) and replace it to that location. -- :code:`${CONFIG.key_name}`: Read the configuration value of key (:code:`key_name`) and it them to that location. - - -In addition, when loading the file path use :code:`$FILE{"file_name"}` or :code:`$WORKFLOW{"other_workflow.yaml"}`, pydolphinscheduler will search in the path of the YAMl file if the file does not exist. - -For exmaples, our file directory structure is as follows: - -.. code-block:: bash - - . - └── yaml_define - ├── Condition.yaml - ├── DataX.yaml - ├── Dependent_External.yaml - ├── Dependent.yaml - ├── example_datax.json - ├── example_sql.sql - ├── example_subprocess.yaml - ├── Flink.yaml - ├── Http.yaml - ├── MapReduce.yaml - ├── MoreConfiguration.yaml - ├── Procedure.yaml - ├── Python.yaml - ├── Shell.yaml - ├── Spark.yaml - ├── Sql.yaml - ├── SubProcess.yaml - └── Switch.yaml - -After we run - -.. code-block:: bash - - pydolphinscheduler yaml -file yaml_define/SubProcess.yaml - - -the :code:`$WORKFLOW{"example_sub_workflow.yaml"}` will be set to :code:`$WORKFLOW{"yaml_define/example_sub_workflow.yaml"}`, because :code:`./example_subprocess.yaml` does not exist and :code:`yaml_define/example_sub_workflow.yaml` does. - -Furthermore, this feature supports recursion all the way down. - - -.. _`DolphinScheduler project page`: https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/project.html -.. _`Python context manager`: https://docs.python.org/3/library/stdtypes.html#context-manager-types diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Condition.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Condition.yaml deleted file mode 100644 index c65b8c7aeb..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Condition.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Condition" - -# Define the tasks under the workflow -tasks: - - { "task_type": "Shell", "name": "pre_task_1", "command": "echo pre_task_1" } - - { "task_type": "Shell", "name": "pre_task_2", "command": "echo pre_task_2" } - - { "task_type": "Shell", "name": "pre_task_3", "command": "echo pre_task_3" } - - { "task_type": "Shell", "name": "success_branch", "command": "echo success_branch" } - - { "task_type": "Shell", "name": "fail_branch", "command": "echo fail_branch" } - - - name: condition - task_type: Condition - success_task: success_branch - failed_task: fail_branch - op: AND - groups: - - op: AND - groups: - - task: pre_task_1 - flag: true - - task: pre_task_2 - flag: true - - task: pre_task_3 - flag: false diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/DataX.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/DataX.yaml deleted file mode 100644 index 00ecd54685..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/DataX.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "DataX" - -# Define the tasks under the workflow -tasks: - - name: task - task_type: DataX - datasource_name: db - datatarget_name: db - sql: show tables; - target_table: table_test - - - name: task_custon_config - task_type: CustomDataX - json: $FILE{"example_datax.json"} diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent.yaml deleted file mode 100644 index d69fac05da..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -workflow: - name: "Dependent" - -# Define the tasks under the workflow -tasks: - - name: dependent - task_type: Dependent - denpendence: - op: and - groups: - - op: or - groups: - - project_name: pydolphin - process_definition_name: task_dependent_external - dependent_task_name: task_1 - - - project_name: pydolphin - process_definition_name: task_dependent_external - dependent_task_name: task_2 - - - op: and - groups: - - project_name: pydolphin - process_definition_name: task_dependent_external - dependent_task_name: task_1 - dependent_date: LAST_WEDNESDAY - - - project_name: pydolphin - process_definition_name: task_dependent_external - dependent_task_name: task_2 - dependent_date: last24Hours - - - name: dependent_var - task_type: Dependent - denpendence: - op: and - groups: - - op: or - # we can use ${CONFIG.WORKFLOW_PROJECT} to set the value to configuration.WORKFLOW_PROJECT - # we can use $WORKFLOW{"Dependent_External.yaml"} to create or update a workflow from dependent_external.yaml and set the value to that workflow name - groups: - - project_name: ${CONFIG.WORKFLOW_PROJECT} - process_definition_name: $WORKFLOW{"Dependent_External.yaml"} - dependent_task_name: task_1 - - - project_name: ${CONFIG.WORKFLOW_PROJECT} - process_definition_name: $WORKFLOW{"Dependent_External.yaml"} - dependent_task_name: task_2 - - op: and - groups: - - project_name: ${CONFIG.WORKFLOW_PROJECT} - process_definition_name: $WORKFLOW{"Dependent_External.yaml"} - dependent_task_name: task_1 - dependent_date: LAST_WEDNESDAY - - - project_name: ${CONFIG.WORKFLOW_PROJECT} - process_definition_name: $WORKFLOW{"Dependent_External.yaml"} - dependent_task_name: task_2 - dependent_date: last24Hours diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent_External.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent_External.yaml deleted file mode 100644 index 577ff6a807..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent_External.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "task_dependent_external" - -# Define the tasks under the workflow -tasks: - - { "task_type": "Shell", "name": "task_1", "command": "echo task 1" } - - { "task_type": "Shell", "name": "task_2", "command": "echo task 2" } - - { "task_type": "Shell", "name": "task_3", "command": "echo task 3" } diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dvc.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dvc.yaml deleted file mode 100644 index a6ec18c372..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dvc.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define variable `repository` -repository: &repository "git@github.com:/dvc-data-repository-example.git" - -# Define the workflow -workflow: - name: "DVC" - release_state: "offline" - -# Define the tasks under the process -tasks: - - name: init_dvc - task_type: DVCInit - repository: *repository - store_url: ~/dvc_data - - - name: upload_data - task_type: DVCUpload - repository: *repository - data_path_in_dvc_repository: "iris" - data_path_in_worker: ~/source/iris - version: v1 - message: upload iris data v1 - - - name: download_data - task_type: DVCDownload - repository: *repository - data_path_in_dvc_repository: "iris" - data_path_in_worker: ~/target/iris - version: v1 diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Flink.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Flink.yaml deleted file mode 100644 index 2449d435a3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Flink.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Flink" - -# Define the tasks under the workflow -tasks: - - name: task - task_type: Flink - main_class: org.apache.flink.streaming.examples.wordcount.WordCount - main_package: test_java.jar - program_type: JAVA - deploy_mode: local diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Http.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Http.yaml deleted file mode 100644 index 1483aeb3d8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Http.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Http" - -# Define the tasks under the workflow -tasks: - - name: task - task_type: Http - url: "https://httpbin.org/get" - http_method: "GET" - http_params: - - { "prop": "a", "httpParametersType": "PARAMETER", "value": "1" } - - { "prop": "b", "httpParametersType": "PARAMETER", "value": "2" } - - { - "prop": "Content-Type", - "httpParametersType": "header", - "value": "test", - } - http_check_condition: "STATUS_CODE_CUSTOM" - condition: "404" diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MapReduce.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MapReduce.yaml deleted file mode 100644 index e1a2b5709c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MapReduce.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "MapReduce" - -# Define the tasks under the workflow -tasks: - - name: task - task_type: MR - main_class: wordcount - main_package: test_java.jar - program_type: SCALA - main_args: /dolphinscheduler/tenant_exists/resources/file.txt /output/ds diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MoreConfiguration.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MoreConfiguration.yaml deleted file mode 100644 index 258aa33433..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MoreConfiguration.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "MoreConfiguration" - param: - n: 1 - -# Define the tasks under the workflow -tasks: - - name: shell_0 - task_type: Shell - description: "yaml define task" - flag: "YES" - command: | - echo "$ENV{HOME}" - echo "${n}" - task_priority: "HIGH" - delay_time: 20 - fail_retry_times: 30 - fail_retry_interval: 5 - timeout_flag: "CLOSE" - timeout: 60 - local_params: - - { "prop": "n", "direct": "IN", "type": "VARCHAR", "value": "${n}" } diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/OpenMLDB.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/OpenMLDB.yaml deleted file mode 100644 index b455cb0768..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/OpenMLDB.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "OpenMLDB" - -# Define the tasks under the workflow -tasks: - - name: OpenMLDB - task_type: OpenMLDB - zookeeper: "127.0.0.1:2181" - zookeeper_path: "/openmldb" - execute_mode: "online" - sql: | - USE demo_db; - set @@job_timeout=200000; - LOAD DATA INFILE 'file:///tmp/train_sample.csv' - INTO TABLE talkingdata OPTIONS(mode='overwrite'); diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Procedure.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Procedure.yaml deleted file mode 100644 index 829a961c1a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Procedure.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Procedure" - -# Define the tasks under the workflow -tasks: - - name: task - task_type: Procedure - datasource_name: db - method: show tables; diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Python.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Python.yaml deleted file mode 100644 index 728b5c928e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Python.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Python" - -# Define the tasks under the workflow -tasks: - - name: python - task_type: Python - definition: | - import os - print(os) - print("1") - print("2") diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Pytorch.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Pytorch.yaml deleted file mode 100644 index 8706824245..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Pytorch.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Pytorch" - -# Define the tasks under the workflow -tasks: - - # run project with existing environment - - name: task_existing_env - task_type: pytorch - script: main.py - script_params: --dry-run --no-cuda - project_path: https://github.com/pytorch/examples#mnist - python_command: /home/anaconda3/envs/pytorch/bin/python3 - - - # run project with creating conda environment - - name: task_conda_env - task_type: pytorch - script: main.py - script_params: --dry-run --no-cuda - project_path: https://github.com/pytorch/examples#mnist - is_create_environment: True - python_env_tool: conda - requirements: requirements.txt - conda_python_version: 3.7 - - # run project with creating virtualenv environment - - name: task_virtualenv_env - task_type: pytorch - script: main.py - script_params: --dry-run --no-cuda - project_path: https://github.com/pytorch/examples#mnist - is_create_environment: True - python_env_tool: virtualenv - requirements: requirements.txt diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sagemaker.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sagemaker.yaml deleted file mode 100644 index 9f77a3caa8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sagemaker.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Sagemaker" - release_state: "offline" - -# Define the tasks under the process -tasks: - - name: sagemaker - task_type: Sagemaker - sagemaker_request_json: $FILE{"example_sagemaker_params.json"} - diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Shell.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Shell.yaml deleted file mode 100644 index fdbe126327..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Shell.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Shell" - release_state: "offline" - run: true - -# Define the tasks under the process -tasks: - - name: task_parent - task_type: Shell - command: | - echo hello pydolphinscheduler - echo run task parent - - - name: task_child_one - task_type: Shell - deps: [task_parent] - command: echo "child one" - - - name: task_child_two - task_type: Shell - deps: [task_parent] - command: echo "child two" diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Spark.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Spark.yaml deleted file mode 100644 index e45514bbf1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Spark.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Spark" - -# Define the tasks under the workflow -tasks: - - name: task - task_type: Spark - main_class: org.apache.spark.examples.SparkPi - main_package: test_java.jar - program_type: SCALA - deploy_mode: local diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sql.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sql.yaml deleted file mode 100644 index c3c7e88ee1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sql.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Sql" - -# Define the tasks under the workflow -tasks: - - name: task_base - task_type: Sql - datasource_name: "db" - sql: show tables; - - - name: task_multi_line - task_type: Sql - datasource_name: "db" - sql: | - show tables; - select id from version where id=1; - - - name: task_file - task_type: Sql - datasource_name: "db" - sql: $FILE{"example_sql.sql"} - - # Or you can define task "task_union" it with one line - - { "task_type": "Sql", "name": "task_base_one_line", "datasource_name": "db", "sql": "select id from version where id=1;"} - - # Or you can define task "task_union" it with one line - - { "task_type": "Sql", "name": "task_file_one_line", "datasource_name": "db", "sql": '$FILE{"example_sql.sql"}'} diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/SubProcess.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/SubProcess.yaml deleted file mode 100644 index 0ea7549db4..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/SubProcess.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "SubWorkflow" - -tasks: - - name: example_workflow - task_type: SubProcess - process_definition_name: $WORKFLOW{"example_sub_workflow.yaml"} - - - { "task_type": "Shell", "deps": [example_workflow], "name": "task_3", "command": "echo task 3" } diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Switch.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Switch.yaml deleted file mode 100644 index 33ed68813e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Switch.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Switch" - param: - var: 1 - -# Define the tasks under the workflow -tasks: - - name: switch_child_1 - task_type: Shell - command: echo switch_child_1 - - - name: switch_child_2 - task_type: Shell - command: echo switch_child_2 - - - name: switch - task_type: Switch - condition: - - task: switch_child_1 - condition: "${var} > 1" - - task: switch_child_2 diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_datax.json b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_datax.json deleted file mode 100644 index 3db8092cb6..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_datax.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "job": { - "content": [ - { - "reader": { - "name": "mysqlreader", - "parameter": { - "username": "usr", - "password": "pwd", - "column": [ - "id", - "name", - "code", - "description" - ], - "splitPk": "id", - "connection": [ - { - "table": [ - "source_table" - ], - "jdbcUrl": [ - "jdbc:mysql://127.0.0.1:3306/source_db" - ] - } - ] - } - }, - "writer": { - "name": "mysqlwriter", - "parameter": { - "writeMode": "insert", - "username": "usr", - "password": "pwd", - "column": [ - "id", - "name" - ], - "connection": [ - { - "jdbcUrl": "jdbc:mysql://127.0.0.1:3306/target_db", - "table": [ - "target_table" - ] - } - ] - } - } - } - ], - "setting": { - "errorLimit": { - "percentage": 0, - "record": 0 - }, - "speed": { - "channel": 1, - "record": 1000 - } - } - } -} diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sagemaker_params.json b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sagemaker_params.json deleted file mode 100644 index 9403320355..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sagemaker_params.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "ParallelismConfiguration":{ - "MaxParallelExecutionSteps":1 - }, - "PipelineExecutionDescription":"run pipeline using ds", - "PipelineExecutionDisplayName":"ds-sagemaker-pipeline", - "PipelineName":"DsSagemakerPipeline", - "PipelineParameters":[ - { - "Name":"InputData", - "Value": "s3://sagemaker/dataset/dataset.csv" - }, - { - "Name":"InferenceData", - "Value": "s3://sagemaker/dataset/inference.csv" - } - ] -} diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sql.sql b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sql.sql deleted file mode 100644 index 06b5c4c16c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sql.sql +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -select id from version where id=1; -select id from version where id=2; -select id from version where id=3; -select id from version where id=4; -select id from version where id=5; diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sub_workflow.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sub_workflow.yaml deleted file mode 100644 index af3a863da9..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sub_workflow.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "example_workflow_for_sub_workflow" - -# Define the tasks under the workflow -tasks: - - { "task_type": "Shell", "name": "task_1", "command": "echo task 1" } - - { "task_type": "Shell", "deps": [task_1], "name": "task_2", "command": "echo task 2" } - - { "task_type": "Shell", "deps": [task_2], "name": "task_3", "command": "echo task 3" } diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml deleted file mode 100644 index 45e56726e1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -# Define variable `mlflow_tracking_uri` -mlflow_tracking_uri: &mlflow_tracking_uri "http://127.0.0.1:5000" - -# Define the workflow -workflow: - name: "MLflow" - -# Define the tasks under the workflow -tasks: - - name: train_xgboost_native - task_type: MLFlowProjectsCustom - repository: https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native - mlflow_tracking_uri: *mlflow_tracking_uri - parameters: -P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9 - experiment_name: xgboost - - - name: train_automl - task_type: MLFlowProjectsAutoML - mlflow_tracking_uri: *mlflow_tracking_uri - parameters: time_budget=30;estimator_list=['lgbm'] - experiment_name: automl_iris - model_name: iris_A - automl_tool: flaml - data_path: /data/examples/iris - - - name: deploy_docker - task_type: MLflowModels - deps: [train_automl] - model_uri: models:/iris_A/Production - mlflow_tracking_uri: *mlflow_tracking_uri - deploy_mode: DOCKER - port: 7002 - - - name: train_basic_algorithm - task_type: MLFlowProjectsBasicAlgorithm - mlflow_tracking_uri: *mlflow_tracking_uri - parameters: n_estimators=200;learning_rate=0.2 - experiment_name: basic_algorithm_iris - model_name: iris_B - algorithm: lightgbm - data_path: /data/examples/iris - search_params: max_depth=[5, 10];n_estimators=[100, 200] - - - name: deploy_mlflow - deps: [train_basic_algorithm] - task_type: MLflowModels - model_uri: models:/iris_B/Production - mlflow_tracking_uri: *mlflow_tracking_uri - deploy_mode: MLFLOW - port: 7001 - diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/tutorial.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/tutorial.yaml deleted file mode 100644 index 104a8c367b..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/tutorial.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "tutorial" - schedule: "0 0 0 * * ? *" - start_time: "2021-01-01" - tenant: "tenant_exists" - release_state: "offline" - run: true - -# Define the tasks under the workflow -tasks: - - name: task_parent - task_type: Shell - command: echo hello pydolphinscheduler - - - name: task_child_one - task_type: Shell - deps: [task_parent] - command: echo "child one" - - - name: task_child_two - task_type: Shell - deps: [task_parent] - command: echo "child two" - - - name: task_union - task_type: Shell - deps: [task_child_one, task_child_two] - command: echo "union" diff --git a/dolphinscheduler-python/pydolphinscheduler/pytest.ini b/dolphinscheduler-python/pydolphinscheduler/pytest.ini deleted file mode 100644 index b1aa850346..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/pytest.ini +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[pytest] -# add path here to skip pytest scan it -norecursedirs = - tests/testing - # Integration test run seperated which do not calculate coverage, it will run in `tox -e integrate-test` - tests/integration diff --git a/dolphinscheduler-python/pydolphinscheduler/setup.cfg b/dolphinscheduler-python/pydolphinscheduler/setup.cfg deleted file mode 100644 index 13a83393a9..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/setup.cfg +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/dolphinscheduler-python/pydolphinscheduler/setup.py b/dolphinscheduler-python/pydolphinscheduler/setup.py deleted file mode 100644 index 66a1ffc86c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/setup.py +++ /dev/null @@ -1,198 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""The script for setting up pydolphinscheduler.""" -import logging -import os -import sys -from distutils.dir_util import remove_tree -from os.path import dirname, join -from typing import List - -from setuptools import Command, find_packages, setup - -if sys.version_info[0] < 3: - raise Exception( - "pydolphinscheduler does not support Python 2. Please upgrade to Python 3." - ) - -logger = logging.getLogger(__name__) - -version = "dev" - -# Start package required -prod = [ - "boto3>=1.23.10", - "oss2>=2.16.0", - "python-gitlab>=2.10.1", - "click>=8.0.0", - "py4j~=0.10", - "ruamel.yaml", -] - -build = [ - "build", - "setuptools>=42", - "wheel", -] - -doc = [ - "sphinx>=4.3", - "sphinx_rtd_theme>=1.0", - "sphinx-click>=3.0", - "sphinx-inline-tabs", - "sphinx-copybutton>=0.4.0", - # Unreleased package have a feature we want(use correct version package for API ref), so we install from - # GitHub directly, see also: - # https://github.com/Holzhaus/sphinx-multiversion/issues/42#issuecomment-1210539786 - "sphinx-multiversion @ git+https://github.com/Holzhaus/sphinx-multiversion#egg=sphinx-multiversion", -] - -test = [ - "pytest>=6.2", - "freezegun>=1.1", - "coverage>=6.1", - "pytest-cov>=3.0", - "docker>=5.0.3", -] - -style = [ - "flake8>=4.0", - "flake8-docstrings>=1.6", - "flake8-black>=0.2", - "isort>=5.10", - "autoflake>=1.4", -] - -dev = style + test + doc + build - -all_dep = prod + dev -# End package required - - -def read(*names, **kwargs): - """Read file content from given file path.""" - return open( - join(dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8") - ).read() - - -class CleanCommand(Command): - """Command to clean up python api before setup by running `python setup.py pre_clean`.""" - - description = "Clean up project root" - user_options: List[str] = [] - clean_list = [ - "build", - "htmlcov", - "dist", - ".pytest_cache", - ".coverage", - ] - - def initialize_options(self) -> None: - """Set default values for options.""" - - def finalize_options(self) -> None: - """Set final values for options.""" - - def run(self) -> None: - """Run and remove temporary files.""" - for cl in self.clean_list: - if not os.path.exists(cl): - logger.info("Path %s do not exists.", cl) - elif os.path.isdir(cl): - remove_tree(cl) - else: - os.remove(cl) - logger.info("Finish pre_clean process.") - - -setup( - name="apache-dolphinscheduler", - version=version, - license="Apache License 2.0", - description="Apache DolphinScheduler Python API", - long_description=read("README.md"), - # Make sure pypi is expecting markdown - long_description_content_type="text/markdown", - author="Apache Software Foundation", - author_email="dev@dolphinscheduler.apache.org", - url="https://dolphinscheduler.apache.org/", - python_requires=">=3.6", - keywords=[ - "dolphinscheduler", - "workflow", - "scheduler", - "taskflow", - ], - project_urls={ - "Homepage": "https://dolphinscheduler.apache.org", - "Documentation": "https://dolphinscheduler.apache.org/python/dev/index.html", - "Source": "https://github.com/apache/dolphinscheduler/tree/dev/dolphinscheduler-python/" - "pydolphinscheduler", - "Issue Tracker": "https://github.com/apache/dolphinscheduler/issues?" - "q=is%3Aissue+is%3Aopen+label%3APython", - "Discussion": "https://github.com/apache/dolphinscheduler/discussions", - "Twitter": "https://twitter.com/dolphinschedule", - }, - packages=find_packages(where="src"), - package_dir={"": "src"}, - include_package_data=True, - package_data={ - "pydolphinscheduler": ["default_config.yaml"], - }, - platforms=["any"], - classifiers=[ - # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers - "Development Status :: 4 - Beta", - "Environment :: Console", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "Operating System :: Unix", - "Operating System :: POSIX", - "Operating System :: Microsoft :: Windows", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", - "Topic :: Software Development :: User Interfaces", - ], - install_requires=prod, - extras_require={ - "all": all_dep, - "dev": dev, - "style": style, - "test": test, - "doc": doc, - "build": build, - }, - cmdclass={ - "pre_clean": CleanCommand, - }, - entry_points={ - "console_scripts": [ - "pydolphinscheduler = pydolphinscheduler.cli.commands:cli", - ], - }, -) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py deleted file mode 100644 index 2a7b55430c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init root of pydolphinscheduler.""" - -from pkg_resources import get_distribution - -__version__ = get_distribution("apache-dolphinscheduler").version diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/__init__.py deleted file mode 100644 index 5f30c83241..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Commands line interface of pydolphinscheduler.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/commands.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/commands.py deleted file mode 100644 index 8d923f1406..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/commands.py +++ /dev/null @@ -1,106 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Commands line interface's command of pydolphinscheduler.""" - -import click -from click import echo - -import pydolphinscheduler -from pydolphinscheduler.configuration import ( - get_single_config, - init_config_file, - set_single_config, -) -from pydolphinscheduler.core.yaml_process_define import create_process_definition - -version_option_val = ["major", "minor", "micro"] - - -@click.group() -def cli(): - """Apache DolphinScheduler Python API's command line interface.""" - - -@cli.command() -@click.option( - "--part", - "-p", - required=False, - type=click.Choice(version_option_val, case_sensitive=False), - multiple=False, - help="The part of version your want to get.", -) -def version(part: str) -> None: - """Show current version of pydolphinscheduler.""" - if part: - idx = version_option_val.index(part) - echo(f"{pydolphinscheduler.__version__.split('.')[idx]}") - else: - echo(f"{pydolphinscheduler.__version__}") - - -@cli.command() -@click.option( - "--init", - "-i", - is_flag=True, - help="Initialize and create configuration file to `PYDS_HOME`.", -) -@click.option( - "--set", - "-s", - "setter", - multiple=True, - type=click.Tuple([str, str]), - help="Set specific setting to config file." - "Use multiple ``--set `` options to set multiple configs", -) -@click.option( - "--get", - "-g", - "getter", - multiple=True, - type=str, - help="Get specific setting from config file." - "Use multiple ``--get `` options to get multiple configs", -) -def config(getter, setter, init) -> None: - """Manage the configuration for pydolphinscheduler.""" - if init: - init_config_file() - elif getter: - click.echo("The configuration query as below:\n") - configs_kv = [f"{key} = {get_single_config(key)}" for key in getter] - click.echo("\n".join(configs_kv)) - elif setter: - for key, val in setter: - set_single_config(key, val) - click.echo("Set configuration done.") - - -@cli.command() -@click.option( - "--yaml_file", - "-f", - required=True, - help="YAML file path", - type=click.Path(exists=True), -) -def yaml(yaml_file) -> None: - """Create process definition using YAML file.""" - create_process_definition(yaml_file) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/configuration.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/configuration.py deleted file mode 100644 index 860f9869f3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/configuration.py +++ /dev/null @@ -1,193 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Configuration module for pydolphinscheduler.""" -import os -from pathlib import Path -from typing import Any - -from pydolphinscheduler.exceptions import PyDSConfException -from pydolphinscheduler.utils import file -from pydolphinscheduler.utils.yaml_parser import YamlParser - -BUILD_IN_CONFIG_PATH = Path(__file__).resolve().parent.joinpath("default_config.yaml") - - -def config_path() -> Path: - """Get the path of pydolphinscheduler configuration file.""" - pyds_home = os.environ.get("PYDS_HOME", "~/pydolphinscheduler") - config_file_path = Path(pyds_home).joinpath("config.yaml").expanduser() - return config_file_path - - -def get_configs() -> YamlParser: - """Get all configuration settings from configuration file. - - Will use custom configuration file first if it exists, otherwise default configuration file in - default path. - """ - path = str(config_path()) if config_path().exists() else BUILD_IN_CONFIG_PATH - with open(path, mode="r") as f: - return YamlParser(f.read()) - - -def init_config_file() -> None: - """Initialize configuration file by default configs.""" - if config_path().exists(): - raise PyDSConfException( - "Initialize configuration false to avoid overwrite configure by accident, file already exists " - "in %s, if you wan to overwrite the exists configure please remove the exists file manually.", - str(config_path()), - ) - file.write(content=str(get_configs()), to_path=str(config_path())) - - -def get_single_config(key: str) -> Any: - """Get single config to configuration file. - - Support get from nested keys by delimiter ``.``. - - For example, yaml config as below: - - .. code-block:: yaml - - one: - two1: - three: value1 - two2: value2 - - you could get ``value1`` and ``value2`` by nested path - - .. code-block:: python - - value1 = get_single_config("one.two1.three") - value2 = get_single_config("one.two2") - - :param key: The config key want to get it value. - """ - config = get_configs() - if key not in config: - raise PyDSConfException( - "Configuration path %s do not exists. Can not get configuration.", key - ) - return config[key] - - -def set_single_config(key: str, value: Any) -> None: - """Change single config to configuration file. - - For example, yaml config as below: - - .. code-block:: yaml - - one: - two1: - three: value1 - two2: value2 - - you could change ``value1`` to ``value3``, also change ``value2`` to ``value4`` by nested path assigned - - .. code-block:: python - - set_single_config["one.two1.three"] = "value3" - set_single_config["one.two2"] = "value4" - - :param key: The config key want change. - :param value: The new value want to set. - """ - config = get_configs() - if key not in config: - raise PyDSConfException( - "Configuration path %s do not exists. Can not set configuration.", key - ) - config[key] = value - file.write(content=str(config), to_path=str(config_path()), overwrite=True) - - -def get_int(val: Any) -> int: - """Covert value to int.""" - return int(val) - - -def get_bool(val: Any) -> bool: - """Covert value to boolean.""" - if isinstance(val, str): - return val.lower() in {"true", "t"} - elif isinstance(val, int): - return val == 1 - else: - return bool(val) - - -# Start Common Configuration Settings - -# Add configs as module variables to avoid read configuration multiple times when -# Get common configuration setting -# Set or get multiple configs in single time -configs: YamlParser = get_configs() - -# Java Gateway Settings -JAVA_GATEWAY_ADDRESS = os.environ.get( - "PYDS_JAVA_GATEWAY_ADDRESS", configs.get("java_gateway.address") -) -JAVA_GATEWAY_PORT = get_int( - os.environ.get("PYDS_JAVA_GATEWAY_PORT", configs.get("java_gateway.port")) -) -JAVA_GATEWAY_AUTO_CONVERT = get_bool( - os.environ.get( - "PYDS_JAVA_GATEWAY_AUTO_CONVERT", configs.get("java_gateway.auto_convert") - ) -) - -# User Settings -USER_NAME = os.environ.get("PYDS_USER_NAME", configs.get("default.user.name")) -USER_PASSWORD = os.environ.get( - "PYDS_USER_PASSWORD", configs.get("default.user.password") -) -USER_EMAIL = os.environ.get("PYDS_USER_EMAIL", configs.get("default.user.email")) -USER_PHONE = str(os.environ.get("PYDS_USER_PHONE", configs.get("default.user.phone"))) -USER_STATE = get_int( - os.environ.get("PYDS_USER_STATE", configs.get("default.user.state")) -) - -# Workflow Settings -WORKFLOW_PROJECT = os.environ.get( - "PYDS_WORKFLOW_PROJECT", configs.get("default.workflow.project") -) -WORKFLOW_TENANT = os.environ.get( - "PYDS_WORKFLOW_TENANT", configs.get("default.workflow.tenant") -) -WORKFLOW_USER = os.environ.get( - "PYDS_WORKFLOW_USER", configs.get("default.workflow.user") -) -WORKFLOW_QUEUE = os.environ.get( - "PYDS_WORKFLOW_QUEUE", configs.get("default.workflow.queue") -) -WORKFLOW_RELEASE_STATE = os.environ.get( - "PYDS_WORKFLOW_RELEASE_STATE", configs.get("default.workflow.release_state") -) -WORKFLOW_WORKER_GROUP = os.environ.get( - "PYDS_WORKFLOW_WORKER_GROUP", configs.get("default.workflow.worker_group") -) -WORKFLOW_TIME_ZONE = os.environ.get( - "PYDS_WORKFLOW_TIME_ZONE", configs.get("default.workflow.time_zone") -) -WORKFLOW_WARNING_TYPE = os.environ.get( - "PYDS_WORKFLOW_WARNING_TYPE", configs.get("default.workflow.warning_type") -) - -# End Common Configuration Setting diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py deleted file mode 100644 index bedbbf2f5e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py +++ /dev/null @@ -1,122 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Constants for pydolphinscheduler.""" - - -class TaskPriority(str): - """Constants for task priority.""" - - HIGHEST = "HIGHEST" - HIGH = "HIGH" - MEDIUM = "MEDIUM" - LOW = "LOW" - LOWEST = "LOWEST" - - -class TaskFlag(str): - """Constants for task flag.""" - - YES = "YES" - NO = "NO" - - -class TaskTimeoutFlag(str): - """Constants for task timeout flag.""" - - CLOSE = "CLOSE" - - -class TaskType(str): - """Constants for task type, it will also show you which kind we support up to now.""" - - SHELL = "SHELL" - HTTP = "HTTP" - PYTHON = "PYTHON" - SQL = "SQL" - SUB_PROCESS = "SUB_PROCESS" - PROCEDURE = "PROCEDURE" - DATAX = "DATAX" - DEPENDENT = "DEPENDENT" - CONDITIONS = "CONDITIONS" - SWITCH = "SWITCH" - FLINK = "FLINK" - SPARK = "SPARK" - MR = "MR" - SAGEMAKER = "SAGEMAKER" - MLFLOW = "MLFLOW" - OPENMLDB = "OPENMLDB" - PYTORCH = "PYTORCH" - DVC = "DVC" - - -class DefaultTaskCodeNum(str): - """Constants and default value for default task code number.""" - - DEFAULT = 1 - - -class JavaGatewayDefault(str): - """Constants and default value for java gateway.""" - - RESULT_MESSAGE_KEYWORD = "msg" - RESULT_MESSAGE_SUCCESS = "success" - - RESULT_STATUS_KEYWORD = "status" - RESULT_STATUS_SUCCESS = "SUCCESS" - - RESULT_DATA = "data" - - -class Delimiter(str): - """Constants for delimiter.""" - - BAR = "-" - DASH = "/" - COLON = ":" - UNDERSCORE = "_" - DIRECTION = "->" - - -class Time(str): - """Constants for date.""" - - FMT_STD_DATE = "%Y-%m-%d" - LEN_STD_DATE = 10 - - FMT_DASH_DATE = "%Y/%m/%d" - - FMT_SHORT_DATE = "%Y%m%d" - LEN_SHORT_DATE = 8 - - FMT_STD_TIME = "%H:%M:%S" - FMT_NO_COLON_TIME = "%H%M%S" - - -class ResourceKey(str): - """Constants for key of resource.""" - - ID = "id" - - -class Symbol(str): - """Constants for symbol.""" - - SLASH = "/" - POINT = "." - COMMA = "," - UNDERLINE = "_" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py deleted file mode 100644 index b997c3e9de..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init pydolphinscheduler.core package.""" - -from pydolphinscheduler.core.database import Database -from pydolphinscheduler.core.engine import Engine -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.core.task import Task - -__all__ = [ - "Database", - "Engine", - "ProcessDefinition", - "Task", -] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/database.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/database.py deleted file mode 100644 index 4a93f22f3f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/database.py +++ /dev/null @@ -1,62 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module database.""" - -from typing import Dict - -from py4j.protocol import Py4JJavaError - -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.java_gateway import JavaGate - - -class Database(dict): - """database object, get information about database. - - You provider database_name contain connection information, it decisions which - database type and database instance would run task. - """ - - def __init__(self, database_name: str, type_key, database_key, *args, **kwargs): - super().__init__(*args, **kwargs) - self._database = {} - self.database_name = database_name - self[type_key] = self.database_type - self[database_key] = self.database_id - - @property - def database_type(self) -> str: - """Get database type from java gateway, a wrapper for :func:`get_database_info`.""" - return self.get_database_info(self.database_name).get("type") - - @property - def database_id(self) -> str: - """Get database id from java gateway, a wrapper for :func:`get_database_info`.""" - return self.get_database_info(self.database_name).get("id") - - def get_database_info(self, name) -> Dict: - """Get database info from java gateway, contains database id, type, name.""" - if self._database: - return self._database - else: - try: - self._database = JavaGate().get_datasource_info(name) - # Handler database source do not exists error, for now we just terminate the process. - except Py4JJavaError as ex: - raise PyDSParamException(str(ex.java_exception)) - return self._database diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/engine.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/engine.py deleted file mode 100644 index 41021ed474..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/engine.py +++ /dev/null @@ -1,94 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module engine.""" - -from typing import Dict, Optional - -from py4j.protocol import Py4JJavaError - -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.java_gateway import JavaGate - - -class ProgramType(str): - """Type of program engine runs, for now it just contain `JAVA`, `SCALA` and `PYTHON`.""" - - JAVA = "JAVA" - SCALA = "SCALA" - PYTHON = "PYTHON" - - -class Engine(Task): - """Task engine object, declare behavior for engine task to dolphinscheduler. - - This is the parent class of spark, flink and mr tasks, - and is used to provide the programType, mainClass and mainJar task parameters for reuse. - """ - - def __init__( - self, - name: str, - task_type: str, - main_class: str, - main_package: str, - program_type: Optional[ProgramType] = ProgramType.SCALA, - *args, - **kwargs - ): - super().__init__(name, task_type, *args, **kwargs) - self.main_class = main_class - self.main_package = main_package - self.program_type = program_type - self._resource = {} - - def get_resource_info(self, program_type, main_package): - """Get resource info from java gateway, contains resource id, name.""" - if self._resource: - return self._resource - else: - try: - self._resource = JavaGate().get_resources_file_info( - program_type, main_package - ) - # Handler source do not exists error, for now we just terminate the process. - except Py4JJavaError as ex: - raise PyDSParamException(str(ex.java_exception)) - return self._resource - - def get_jar_id(self) -> int: - """Get jar id from java gateway, a wrapper for :func:`get_resource_info`.""" - return self.get_resource_info(self.program_type, self.main_package).get("id") - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for engine children task. - - children task have some specials attribute for task_params, and is odd if we - directly set as python property, so we Override Task.task_params here. - """ - params = super().task_params - custom_params = { - "programType": self.program_type, - "mainClass": self.main_class, - "mainJar": { - "id": self.get_jar_id(), - }, - } - params.update(custom_params) - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/process_definition.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/process_definition.py deleted file mode 100644 index 62de7ed1b4..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/process_definition.py +++ /dev/null @@ -1,424 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module process definition, core class for workflow define.""" - -import json -from datetime import datetime -from typing import Any, Dict, List, Optional, Set - -from pydolphinscheduler import configuration -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.resource import Resource -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.exceptions import PyDSParamException, PyDSTaskNoFoundException -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models import Base, Project, Tenant, User -from pydolphinscheduler.utils.date import MAX_DATETIME, conv_from_str, conv_to_schedule - - -class ProcessDefinitionContext: - """Class process definition context, use when task get process definition from context expression.""" - - _context_managed_process_definition: Optional["ProcessDefinition"] = None - - @classmethod - def set(cls, pd: "ProcessDefinition") -> None: - """Set attribute self._context_managed_process_definition.""" - cls._context_managed_process_definition = pd - - @classmethod - def get(cls) -> Optional["ProcessDefinition"]: - """Get attribute self._context_managed_process_definition.""" - return cls._context_managed_process_definition - - @classmethod - def delete(cls) -> None: - """Delete attribute self._context_managed_process_definition.""" - cls._context_managed_process_definition = None - - -class ProcessDefinition(Base): - """process definition object, will define process definition attribute, task, relation. - - TODO: maybe we should rename this class, currently use DS object name. - - :param user: The user for current process definition. Will create a new one if it do not exists. If your - parameter ``project`` already exists but project's create do not belongs to ``user``, will grant - ``project`` to ``user`` automatically. - :param project: The project for current process definition. You could see the workflow in this project - thought Web UI after it :func:`submit` or :func:`run`. It will create a new project belongs to - ``user`` if it does not exists. And when ``project`` exists but project's create do not belongs - to ``user``, will grant `project` to ``user`` automatically. - :param resource_list: Resource files required by the current process definition.You can create and modify - resource files from this field. When the process definition is submitted, these resource files are - also submitted along with it. - """ - - # key attribute for identify ProcessDefinition object - _KEY_ATTR = { - "name", - "project", - "tenant", - "release_state", - "param", - } - - _DEFINE_ATTR = { - "name", - "description", - "_project", - "_tenant", - "worker_group", - "warning_type", - "warning_group_id", - "timeout", - "release_state", - "param", - "tasks", - "task_definition_json", - "task_relation_json", - "resource_list", - } - - def __init__( - self, - name: str, - description: Optional[str] = None, - schedule: Optional[str] = None, - start_time: Optional[str] = None, - end_time: Optional[str] = None, - timezone: Optional[str] = configuration.WORKFLOW_TIME_ZONE, - user: Optional[str] = configuration.WORKFLOW_USER, - project: Optional[str] = configuration.WORKFLOW_PROJECT, - tenant: Optional[str] = configuration.WORKFLOW_TENANT, - worker_group: Optional[str] = configuration.WORKFLOW_WORKER_GROUP, - warning_type: Optional[str] = configuration.WORKFLOW_WARNING_TYPE, - warning_group_id: Optional[int] = 0, - timeout: Optional[int] = 0, - release_state: Optional[str] = configuration.WORKFLOW_RELEASE_STATE, - param: Optional[Dict] = None, - resource_plugin: Optional[ResourcePlugin] = None, - resource_list: Optional[List[Resource]] = None, - ): - super().__init__(name, description) - self.schedule = schedule - self._start_time = start_time - self._end_time = end_time - self.timezone = timezone - self._user = user - self._project = project - self._tenant = tenant - self.worker_group = worker_group - self.warning_type = warning_type - if warning_type.strip().upper() not in ("FAILURE", "SUCCESS", "ALL", "NONE"): - raise PyDSParamException( - "Parameter `warning_type` with unexpect value `%s`", warning_type - ) - else: - self.warning_type = warning_type.strip().upper() - self.warning_group_id = warning_group_id - self.timeout = timeout - self._release_state = release_state - self.param = param - self.tasks: dict = {} - self.resource_plugin = resource_plugin - # TODO how to fix circle import - self._task_relations: set["TaskRelation"] = set() # noqa: F821 - self._process_definition_code = None - self.resource_list = resource_list or [] - - def __enter__(self) -> "ProcessDefinition": - ProcessDefinitionContext.set(self) - return self - - def __exit__(self, exc_type, exc_val, exc_tb) -> None: - ProcessDefinitionContext.delete() - - @property - def tenant(self) -> Tenant: - """Get attribute tenant.""" - return Tenant(self._tenant) - - @tenant.setter - def tenant(self, tenant: Tenant) -> None: - """Set attribute tenant.""" - self._tenant = tenant.name - - @property - def project(self) -> Project: - """Get attribute project.""" - return Project(self._project) - - @project.setter - def project(self, project: Project) -> None: - """Set attribute project.""" - self._project = project.name - - @property - def user(self) -> User: - """Get user object. - - For now we just get from python models but not from java gateway models, so it may not correct. - """ - return User(name=self._user, tenant=self._tenant) - - @staticmethod - def _parse_datetime(val: Any) -> Any: - if val is None or isinstance(val, datetime): - return val - elif isinstance(val, str): - return conv_from_str(val) - else: - raise PyDSParamException("Do not support value type %s for now", type(val)) - - @property - def start_time(self) -> Any: - """Get attribute start_time.""" - return self._parse_datetime(self._start_time) - - @start_time.setter - def start_time(self, val) -> None: - """Set attribute start_time.""" - self._start_time = val - - @property - def end_time(self) -> Any: - """Get attribute end_time.""" - return self._parse_datetime(self._end_time) - - @end_time.setter - def end_time(self, val) -> None: - """Set attribute end_time.""" - self._end_time = val - - @property - def release_state(self) -> int: - """Get attribute release_state.""" - rs_ref = { - "online": 1, - "offline": 0, - } - if self._release_state not in rs_ref: - raise PyDSParamException( - "Parameter release_state only support `online` or `offline` but get %", - self._release_state, - ) - return rs_ref[self._release_state] - - @release_state.setter - def release_state(self, val: str) -> None: - """Set attribute release_state.""" - self._release_state = val.lower() - - @property - def param_json(self) -> Optional[List[Dict]]: - """Return param json base on self.param.""" - # Handle empty dict and None value - if not self.param: - return [] - return [ - { - "prop": k, - "direct": "IN", - "type": "VARCHAR", - "value": v, - } - for k, v in self.param.items() - ] - - @property - def task_definition_json(self) -> List[Dict]: - """Return all tasks definition in list of dict.""" - if not self.tasks: - return [self.tasks] - else: - return [task.get_define() for task in self.tasks.values()] - - @property - def task_relation_json(self) -> List[Dict]: - """Return all relation between tasks pair in list of dict.""" - if not self.tasks: - return [self.tasks] - else: - self._handle_root_relation() - return [tr.get_define() for tr in self._task_relations] - - @property - def schedule_json(self) -> Optional[Dict]: - """Get schedule parameter json object. This is requests from java gateway interface.""" - if not self.schedule: - return None - else: - start_time = conv_to_schedule( - self.start_time if self.start_time else datetime.now() - ) - end_time = conv_to_schedule( - self.end_time if self.end_time else MAX_DATETIME - ) - return { - "startTime": start_time, - "endTime": end_time, - "crontab": self.schedule, - "timezoneId": self.timezone, - } - - @property - def task_list(self) -> List["Task"]: # noqa: F821 - """Return list of tasks objects.""" - return list(self.tasks.values()) - - def _handle_root_relation(self): - """Handle root task property :class:`pydolphinscheduler.core.task.TaskRelation`. - - Root task in DAG do not have dominant upstream node, but we have to add an exactly default - upstream task with task_code equal to `0`. This is requests from java gateway interface. - """ - from pydolphinscheduler.core.task import TaskRelation - - post_relation_code = set() - for relation in self._task_relations: - post_relation_code.add(relation.post_task_code) - for task in self.task_list: - if task.code not in post_relation_code: - root_relation = TaskRelation(pre_task_code=0, post_task_code=task.code) - self._task_relations.add(root_relation) - - def add_task(self, task: "Task") -> None: # noqa: F821 - """Add a single task to process definition.""" - self.tasks[task.code] = task - task._process_definition = self - - def add_tasks(self, tasks: List["Task"]) -> None: # noqa: F821 - """Add task sequence to process definition, it a wrapper of :func:`add_task`.""" - for task in tasks: - self.add_task(task) - - def get_task(self, code: str) -> "Task": # noqa: F821 - """Get task object from process definition by given code.""" - if code not in self.tasks: - raise PyDSTaskNoFoundException( - "Task with code %s can not found in process definition %", - (code, self.name), - ) - return self.tasks[code] - - # TODO which tying should return in this case - def get_tasks_by_name(self, name: str) -> Set["Task"]: # noqa: F821 - """Get tasks object by given name, if will return all tasks with this name.""" - find = set() - for task in self.tasks.values(): - if task.name == name: - find.add(task) - return find - - def get_one_task_by_name(self, name: str) -> "Task": # noqa: F821 - """Get exact one task from process definition by given name. - - Function always return one task even though this process definition have more than one task with - this name. - """ - tasks = self.get_tasks_by_name(name) - if not tasks: - raise PyDSTaskNoFoundException(f"Can not find task with name {name}.") - return tasks.pop() - - def run(self): - """Submit and Start ProcessDefinition instance. - - Shortcut for function :func:`submit` and function :func:`start`. Only support manual start workflow - for now, and schedule run will coming soon. - :return: - """ - self.submit() - self.start() - - def _ensure_side_model_exists(self): - """Ensure process definition models model exists. - - For now, models object including :class:`pydolphinscheduler.models.project.Project`, - :class:`pydolphinscheduler.models.tenant.Tenant`, :class:`pydolphinscheduler.models.user.User`. - If these model not exists, would create default value in - :class:`pydolphinscheduler.constants.ProcessDefinitionDefault`. - """ - # TODO used metaclass for more pythonic - self.user.create_if_not_exists() - # Project model need User object exists - self.project.create_if_not_exists(self._user) - - def _pre_submit_check(self): - """Check specific condition satisfy before. - - This method should be called before process definition submit to java gateway - For now, we have below checker: - * `self.param` or at least one local param of task should be set if task `switch` in this workflow. - """ - if ( - any([task.task_type == TaskType.SWITCH for task in self.tasks.values()]) - and self.param is None - and all([len(task.local_params) == 0 for task in self.tasks.values()]) - ): - raise PyDSParamException( - "Parameter param or at least one local_param of task must " - "be provider if task Switch in process definition." - ) - - def submit(self) -> int: - """Submit ProcessDefinition instance to java gateway.""" - self._ensure_side_model_exists() - self._pre_submit_check() - - self._process_definition_code = JavaGate().create_or_update_process_definition( - self._user, - self._project, - self.name, - str(self.description) if self.description else "", - json.dumps(self.param_json), - self.warning_type, - self.warning_group_id, - self.timeout, - self.worker_group, - self._tenant, - self.release_state, - # TODO add serialization function - json.dumps(self.task_relation_json), - json.dumps(self.task_definition_json), - json.dumps(self.schedule_json) if self.schedule_json else None, - None, - None, - ) - if len(self.resource_list) > 0: - for res in self.resource_list: - res.user_name = self._user - res.create_or_update_resource() - return self._process_definition_code - - def start(self) -> None: - """Create and start ProcessDefinition instance. - - which post to `start-process-instance` to java gateway - """ - JavaGate().exec_process_instance( - self._user, - self._project, - self.name, - "", - self.worker_group, - self.warning_type, - self.warning_group_id, - 24 * 3600, - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource.py deleted file mode 100644 index ea811915e2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource.py +++ /dev/null @@ -1,73 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module resource.""" - -from typing import Optional - -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models import Base - - -class Resource(Base): - """resource object, will define the resources that you want to create or update. - - :param name: The fullname of resource.Includes path and suffix. - :param content: The description of resource. - :param description: The description of resource. - :param user_name: The user name of resource. - """ - - _DEFINE_ATTR = {"name", "content", "description", "user_name"} - - def __init__( - self, - name: str, - content: Optional[str] = None, - description: Optional[str] = None, - user_name: Optional[str] = None, - ): - super().__init__(name, description) - self.content = content - self.user_name = user_name - self._resource_code = None - - def get_info_from_database(self): - """Get resource info from java gateway, contains resource id, name.""" - if not self.user_name: - raise PyDSParamException( - "`user_name` is required when querying resources from python gate." - ) - return JavaGate().query_resources_file_info(self.user_name, self.name) - - def get_id_from_database(self): - """Get resource id from java gateway.""" - return self.get_info_from_database().getId() - - def create_or_update_resource(self): - """Create or update resource via java gateway.""" - if not self.content or not self.user_name: - raise PyDSParamException( - "`user_name` and `content` are required when create or update resource from python gate." - ) - JavaGate().create_or_update_resource( - self.user_name, - self.name, - self.content, - self.description, - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource_plugin.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource_plugin.py deleted file mode 100644 index 8b500d165f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource_plugin.py +++ /dev/null @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler ResourcePlugin object.""" - -from abc import ABCMeta, abstractmethod - -from pydolphinscheduler.exceptions import PyResPluginException - - -# [start resource_plugin_definition] -class ResourcePlugin(object, metaclass=ABCMeta): - """ResourcePlugin object, declare resource plugin for task and workflow to dolphinscheduler. - - :param prefix: A string representing the prefix of ResourcePlugin. - - """ - - # [start init_method] - def __init__(self, prefix: str, *args, **kwargs): - self.prefix = prefix - - # [end init_method] - - # [start abstractmethod read_file] - @abstractmethod - def read_file(self, suf: str): - """Get the content of the file. - - The address of the file is the prefix of the resource plugin plus the parameter suf. - """ - - # [end abstractmethod read_file] - - def get_index(self, s: str, x, n): - """Find the subscript of the nth occurrence of the X character in the string s.""" - if n <= s.count(x): - all_index = [key for key, value in enumerate(s) if value == x] - return all_index[n - 1] - else: - raise PyResPluginException("Incomplete path.") - - -# [end resource_plugin_definition] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py deleted file mode 100644 index 3fec31fd67..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py +++ /dev/null @@ -1,384 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler Task and TaskRelation object.""" -import copy -import types -from logging import getLogger -from typing import Dict, List, Optional, Sequence, Set, Tuple, Union - -from pydolphinscheduler import configuration -from pydolphinscheduler.constants import ( - Delimiter, - ResourceKey, - Symbol, - TaskFlag, - TaskPriority, - TaskTimeoutFlag, -) -from pydolphinscheduler.core.process_definition import ( - ProcessDefinition, - ProcessDefinitionContext, -) -from pydolphinscheduler.core.resource import Resource -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.exceptions import PyDSParamException, PyResPluginException -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models import Base - -logger = getLogger(__name__) - - -class TaskRelation(Base): - """TaskRelation object, describe the relation of exactly two tasks.""" - - # Add attr `_KEY_ATTR` to overwrite :func:`__eq__`, it is make set - # `Task.process_definition._task_relations` work correctly. - _KEY_ATTR = { - "pre_task_code", - "post_task_code", - } - - _DEFINE_ATTR = { - "pre_task_code", - "post_task_code", - } - - _DEFAULT_ATTR = { - "name": "", - "preTaskVersion": 1, - "postTaskVersion": 1, - "conditionType": 0, - "conditionParams": {}, - } - - def __init__( - self, - pre_task_code: int, - post_task_code: int, - name: Optional[str] = None, - ): - super().__init__(name) - self.pre_task_code = pre_task_code - self.post_task_code = post_task_code - - def __hash__(self): - return hash(f"{self.pre_task_code} {Delimiter.DIRECTION} {self.post_task_code}") - - -class Task(Base): - """Task object, parent class for all exactly task type.""" - - _DEFINE_ATTR = { - "name", - "code", - "version", - "task_type", - "task_params", - "description", - "flag", - "task_priority", - "worker_group", - "environment_code", - "delay_time", - "fail_retry_times", - "fail_retry_interval", - "timeout_flag", - "timeout_notify_strategy", - "timeout", - } - - # task default attribute will into `task_params` property - _task_default_attr = { - "local_params", - "resource_list", - "dependence", - "wait_start_timeout", - "condition_result", - } - # task attribute ignore from _task_default_attr and will not into `task_params` property - _task_ignore_attr: set = set() - # task custom attribute define in sub class and will append to `task_params` property - _task_custom_attr: set = set() - - ext: set = None - ext_attr: Union[str, types.FunctionType] = None - - DEFAULT_CONDITION_RESULT = {"successNode": [""], "failedNode": [""]} - - def __init__( - self, - name: str, - task_type: str, - description: Optional[str] = None, - flag: Optional[str] = TaskFlag.YES, - task_priority: Optional[str] = TaskPriority.MEDIUM, - worker_group: Optional[str] = configuration.WORKFLOW_WORKER_GROUP, - environment_name: Optional[str] = None, - delay_time: Optional[int] = 0, - fail_retry_times: Optional[int] = 0, - fail_retry_interval: Optional[int] = 1, - timeout_flag: Optional[int] = TaskTimeoutFlag.CLOSE, - timeout_notify_strategy: Optional = None, - timeout: Optional[int] = 0, - process_definition: Optional[ProcessDefinition] = None, - local_params: Optional[List] = None, - resource_list: Optional[List] = None, - dependence: Optional[Dict] = None, - wait_start_timeout: Optional[Dict] = None, - condition_result: Optional[Dict] = None, - resource_plugin: Optional[ResourcePlugin] = None, - ): - - super().__init__(name, description) - self.task_type = task_type - self.flag = flag - self.task_priority = task_priority - self.worker_group = worker_group - self._environment_name = environment_name - self.fail_retry_times = fail_retry_times - self.fail_retry_interval = fail_retry_interval - self.delay_time = delay_time - self.timeout_flag = timeout_flag - self.timeout_notify_strategy = timeout_notify_strategy - self.timeout = timeout - self._process_definition = None - self.process_definition: ProcessDefinition = ( - process_definition or ProcessDefinitionContext.get() - ) - self._upstream_task_codes: Set[int] = set() - self._downstream_task_codes: Set[int] = set() - self._task_relation: Set[TaskRelation] = set() - # move attribute code and version after _process_definition and process_definition declare - self.code, self.version = self.gen_code_and_version() - # Add task to process definition, maybe we could put into property process_definition latter - - if ( - self.process_definition is not None - and self.code not in self.process_definition.tasks - ): - self.process_definition.add_task(self) - else: - logger.warning( - "Task code %d already in process definition, prohibit re-add task.", - self.code, - ) - - # Attribute for task param - self.local_params = local_params or [] - self._resource_list = resource_list or [] - self.dependence = dependence or {} - self.wait_start_timeout = wait_start_timeout or {} - self._condition_result = condition_result or self.DEFAULT_CONDITION_RESULT - self.resource_plugin = resource_plugin - self.get_content() - - @property - def process_definition(self) -> Optional[ProcessDefinition]: - """Get attribute process_definition.""" - return self._process_definition - - @process_definition.setter - def process_definition(self, process_definition: Optional[ProcessDefinition]): - """Set attribute process_definition.""" - self._process_definition = process_definition - - @property - def resource_list(self) -> List: - """Get task define attribute `resource_list`.""" - resources = set() - for res in self._resource_list: - if type(res) == str: - resources.add( - Resource(name=res, user_name=self.user_name).get_id_from_database() - ) - elif type(res) == dict and res.get(ResourceKey.ID) is not None: - logger.warning( - """`resource_list` should be defined using List[str] with resource paths, - the use of ids to define resources will be remove in version 3.2.0. - """ - ) - resources.add(res.get(ResourceKey.ID)) - return [{ResourceKey.ID: r} for r in resources] - - @property - def user_name(self) -> Optional[str]: - """Return user name of process definition.""" - if self.process_definition: - return self.process_definition.user.name - else: - raise PyDSParamException("`user_name` cannot be empty.") - - @property - def condition_result(self) -> Dict: - """Get attribute condition_result.""" - return self._condition_result - - @condition_result.setter - def condition_result(self, condition_result: Optional[Dict]): - """Set attribute condition_result.""" - self._condition_result = condition_result - - def _get_attr(self) -> Set[str]: - """Get final task task_params attribute. - - Base on `_task_default_attr`, append attribute from `_task_custom_attr` and subtract attribute from - `_task_ignore_attr`. - """ - attr = copy.deepcopy(self._task_default_attr) - attr -= self._task_ignore_attr - attr |= self._task_custom_attr - return attr - - @property - def task_params(self) -> Optional[Dict]: - """Get task parameter object. - - Will get result to combine _task_custom_attr and custom_attr. - """ - custom_attr = self._get_attr() - return self.get_define_custom(custom_attr=custom_attr) - - def get_plugin(self): - """Return the resource plug-in. - - according to parameter resource_plugin and parameter - process_definition.resource_plugin. - """ - if self.resource_plugin is None: - if self.process_definition.resource_plugin is not None: - return self.process_definition.resource_plugin - else: - raise PyResPluginException( - "The execution command of this task is a file, but the resource plugin is empty" - ) - else: - return self.resource_plugin - - def get_content(self): - """Get the file content according to the resource plugin.""" - if self.ext_attr is None and self.ext is None: - return - _ext_attr = getattr(self, self.ext_attr) - if _ext_attr is not None: - if isinstance(_ext_attr, str) and _ext_attr.endswith(tuple(self.ext)): - res = self.get_plugin() - content = res.read_file(_ext_attr) - setattr(self, self.ext_attr.lstrip(Symbol.UNDERLINE), content) - else: - if self.resource_plugin is not None or ( - self.process_definition is not None - and self.process_definition.resource_plugin is not None - ): - index = _ext_attr.rfind(Symbol.POINT) - if index != -1: - raise ValueError( - "This task does not support files with suffix {}, only supports {}".format( - _ext_attr[index:], - Symbol.COMMA.join(str(suf) for suf in self.ext), - ) - ) - setattr(self, self.ext_attr.lstrip(Symbol.UNDERLINE), _ext_attr) - - def __hash__(self): - return hash(self.code) - - def __lshift__(self, other: Union["Task", Sequence["Task"]]): - """Implement Task << Task.""" - self.set_upstream(other) - return other - - def __rshift__(self, other: Union["Task", Sequence["Task"]]): - """Implement Task >> Task.""" - self.set_downstream(other) - return other - - def __rrshift__(self, other: Union["Task", Sequence["Task"]]): - """Call for Task >> [Task] because list don't have __rshift__ operators.""" - self.__lshift__(other) - return self - - def __rlshift__(self, other: Union["Task", Sequence["Task"]]): - """Call for Task << [Task] because list don't have __lshift__ operators.""" - self.__rshift__(other) - return self - - def _set_deps( - self, tasks: Union["Task", Sequence["Task"]], upstream: bool = True - ) -> None: - """ - Set parameter tasks dependent to current task. - - it is a wrapper for :func:`set_upstream` and :func:`set_downstream`. - """ - if not isinstance(tasks, Sequence): - tasks = [tasks] - - for task in tasks: - if upstream: - self._upstream_task_codes.add(task.code) - task._downstream_task_codes.add(self.code) - - if self._process_definition: - task_relation = TaskRelation( - pre_task_code=task.code, - post_task_code=self.code, - name=f"{task.name} {Delimiter.DIRECTION} {self.name}", - ) - self.process_definition._task_relations.add(task_relation) - else: - self._downstream_task_codes.add(task.code) - task._upstream_task_codes.add(self.code) - - if self._process_definition: - task_relation = TaskRelation( - pre_task_code=self.code, - post_task_code=task.code, - name=f"{self.name} {Delimiter.DIRECTION} {task.name}", - ) - self.process_definition._task_relations.add(task_relation) - - def set_upstream(self, tasks: Union["Task", Sequence["Task"]]) -> None: - """Set parameter tasks as upstream to current task.""" - self._set_deps(tasks, upstream=True) - - def set_downstream(self, tasks: Union["Task", Sequence["Task"]]) -> None: - """Set parameter tasks as downstream to current task.""" - self._set_deps(tasks, upstream=False) - - # TODO code should better generate in bulk mode when :ref: processDefinition run submit or start - def gen_code_and_version(self) -> Tuple: - """ - Generate task code and version from java gateway. - - If task name do not exists in process definition before, if will generate new code and version id - equal to 0 by java gateway, otherwise if will return the exists code and version. - """ - # TODO get code from specific project process definition and task name - result = JavaGate().get_code_and_version( - self.process_definition._project, self.process_definition.name, self.name - ) - # result = gateway.entry_point.genTaskCodeList(DefaultTaskCodeNum.DEFAULT) - # gateway_result_checker(result) - return result.get("code"), result.get("version") - - @property - def environment_code(self) -> str: - """Convert environment name to code.""" - if self._environment_name is None: - return None - return JavaGate().query_environment_info(self._environment_name) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/yaml_process_define.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/yaml_process_define.py deleted file mode 100644 index 0944925a48..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/yaml_process_define.py +++ /dev/null @@ -1,466 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Parse YAML file to create process.""" - -import logging -import os -import re -from pathlib import Path -from typing import Any, Dict - -from pydolphinscheduler import configuration, tasks -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSTaskNoFoundException -from pydolphinscheduler.utils.yaml_parser import YamlParser - -logger = logging.getLogger(__file__) - -KEY_PROCESS = "workflow" -KEY_TASK = "tasks" -KEY_TASK_TYPE = "task_type" -KEY_DEPS = "deps" -KEY_OP = "op" - -TASK_SPECIAL_KEYS = [KEY_TASK_TYPE, KEY_DEPS] - - -class ParseTool: - """Enhanced parsing tools.""" - - @staticmethod - def parse_string_param_if_file(string_param: str, **kwargs): - """Use $FILE{"data_path"} to load file from "data_path".""" - if string_param.startswith("$FILE"): - path = re.findall(r"\$FILE\{\"(.*?)\"\}", string_param)[0] - base_folder = kwargs.get("base_folder", ".") - path = ParseTool.get_possible_path(path, base_folder) - with open(path, "r") as read_file: - string_param = "".join(read_file) - return string_param - - @staticmethod - def parse_string_param_if_env(string_param: str, **kwargs): - """Use $ENV{env_name} to load environment variable "env_name".""" - if "$ENV" in string_param: - key = re.findall(r"\$ENV\{(.*?)\}", string_param)[0] - env_value = os.environ.get(key, "$%s" % key) - string_param = string_param.replace("$ENV{%s}" % key, env_value) - return string_param - - @staticmethod - def parse_string_param_if_config(string_param: str, **kwargs): - """Use ${CONFIG.var_name} to load variable "var_name" from configuration.""" - if "${CONFIG" in string_param: - key = re.findall(r"\$\{CONFIG\.(.*?)\}", string_param)[0] - if hasattr(configuration, key): - string_param = getattr(configuration, key) - else: - string_param = configuration.get_single_config(key) - - return string_param - - @staticmethod - def get_possible_path(file_path, base_folder): - """Get file possible path. - - Return new path if file_path is not exists, but base_folder + file_path exists - """ - possible_path = file_path - if not Path(file_path).exists(): - new_path = Path(base_folder).joinpath(file_path) - if new_path.exists(): - possible_path = new_path - logger.info(f"{file_path} not exists, convert to {possible_path}") - - return possible_path - - -def get_task_cls(task_type) -> Task: - """Get the task class object by task_type (case compatible).""" - # only get task class from tasks.__all__ - all_task_types = {type_.capitalize(): type_ for type_ in tasks.__all__} - task_type_cap = task_type.capitalize() - if task_type_cap not in all_task_types: - raise PyDSTaskNoFoundException("cant not find task %s" % task_type) - - standard_name = all_task_types[task_type_cap] - return getattr(tasks, standard_name) - - -class YamlProcess(YamlParser): - """Yaml parser for create process. - - :param yaml_file: yaml file path. - - examples1 :: - - parser = YamlParser(yaml_file=...) - parser.create_process_definition() - - examples2 :: - - YamlParser(yaml_file=...).create_process_definition() - - """ - - _parse_rules = [ - ParseTool.parse_string_param_if_file, - ParseTool.parse_string_param_if_env, - ParseTool.parse_string_param_if_config, - ] - - def __init__(self, yaml_file: str): - with open(yaml_file, "r") as f: - content = f.read() - - self._base_folder = Path(yaml_file).parent - content = self.prepare_refer_process(content) - super().__init__(content) - - def create_process_definition(self): - """Create process main function.""" - # get process parameters with key "workflow" - process_params = self[KEY_PROCESS] - - # pop "run" parameter, used at the end - is_run = process_params.pop("run", False) - - # use YamlProcess._parse_rules to parse special value of yaml file - process_params = self.parse_params(process_params) - - process_name = process_params["name"] - logger.info(f"Create Process: {process_name}") - with ProcessDefinition(**process_params) as pd: - - # save dependencies between tasks - dependencies = {} - - # save name and task mapping - name2task = {} - - # get task datas with key "tasks" - for task_data in self[KEY_TASK]: - task = self.parse_task(task_data, name2task) - - deps = task_data.get(KEY_DEPS, []) - if deps: - dependencies[task.name] = deps - name2task[task.name] = task - - # build dependencies between task - for downstream_task_name, deps in dependencies.items(): - downstream_task = name2task[downstream_task_name] - for upstream_task_name in deps: - upstream_task = name2task[upstream_task_name] - upstream_task >> downstream_task - - pd.submit() - # if set is_run, run the process after submit - if is_run: - logger.info(f"run workflow: {pd}") - pd.run() - - return process_name - - def parse_params(self, params: Any): - """Recursively resolves the parameter values. - - The function operates params only when it encounters a string; other types continue recursively. - """ - if isinstance(params, str): - for parse_rule in self._parse_rules: - params_ = params - params = parse_rule(params, base_folder=self._base_folder) - if params_ != params: - logger.info(f"parse {params_} -> {params}") - - elif isinstance(params, list): - for index in range(len(params)): - params[index] = self.parse_params(params[index]) - - elif isinstance(params, dict): - for key, value in params.items(): - params[key] = self.parse_params(value) - - return params - - @classmethod - def parse(cls, yaml_file: str): - """Recursively resolves the parameter values. - - The function operates params only when it encounters a string; other types continue recursively. - """ - process_name = cls(yaml_file).create_process_definition() - return process_name - - def prepare_refer_process(self, content): - """Allow YAML files to reference process derived from other YAML files.""" - process_paths = re.findall(r"\$WORKFLOW\{\"(.*?)\"\}", content) - for process_path in process_paths: - logger.info( - f"find special token {process_path}, load process form {process_path}" - ) - possible_path = ParseTool.get_possible_path(process_path, self._base_folder) - process_name = YamlProcess.parse(possible_path) - content = content.replace('$WORKFLOW{"%s"}' % process_path, process_name) - - return content - - def parse_task(self, task_data: dict, name2task: Dict[str, Task]): - """Parse various types of tasks. - - :param task_data: dict. - { - "task_type": "Shell", - "params": {"name": "shell_task", "command":"ehco hellp"} - } - - :param name2task: Dict[str, Task]), mapping of task_name and task - - - Some task type have special parse func: - if task type is Switch, use parse_switch; - if task type is Condition, use parse_condition; - if task type is Dependent, use parse_dependent; - other, we pass all task_params as input to task class, like "task_cls(**task_params)". - """ - task_type = task_data["task_type"] - # get params without special key - task_params = {k: v for k, v in task_data.items() if k not in TASK_SPECIAL_KEYS} - - task_cls = get_task_cls(task_type) - - # use YamlProcess._parse_rules to parse special value of yaml file - task_params = self.parse_params(task_params) - - if task_cls == tasks.Switch: - task = self.parse_switch(task_params, name2task) - - elif task_cls == tasks.Condition: - task = self.parse_condition(task_params, name2task) - - elif task_cls == tasks.Dependent: - task = self.parse_dependent(task_params, name2task) - - else: - task = task_cls(**task_params) - logger.info(task_type, task) - return task - - def parse_switch(self, task_params, name2task): - """Parse Switch Task. - - This is an example Yaml fragment of task_params - - name: switch - condition: - - ["${var} > 1", switch_child_1] - - switch_child_2 - """ - from pydolphinscheduler.tasks.switch import ( - Branch, - Default, - Switch, - SwitchCondition, - ) - - condition_datas = task_params["condition"] - conditions = [] - for condition_data in condition_datas: - assert "task" in condition_data, "task must be in %s" % condition_data - task_name = condition_data["task"] - condition_string = condition_data.get("condition", None) - - # if condition_string is None, for example: {"task": "switch_child_2"}, set it to Default branch - if condition_string is None: - conditions.append(Default(task=name2task.get(task_name))) - - # if condition_string is not None, for example: - # {"task": "switch_child_2", "condition": "${var} > 1"} set it to Branch - else: - conditions.append( - Branch(condition_string, task=name2task.get(task_name)) - ) - - switch = Switch( - name=task_params["name"], condition=SwitchCondition(*conditions) - ) - return switch - - def parse_condition(self, task_params, name2task): - """Parse Condition Task. - - This is an example Yaml fragment of task_params - - name: condition - success_task: success_branch - failed_task: fail_branch - OP: AND - groups: - - - OP: AND - groups: - - [pre_task_1, true] - - [pre_task_2, true] - - [pre_task_3, false] - - - OP: AND - groups: - - [pre_task_1, false] - - [pre_task_2, true] - - [pre_task_3, true] - - """ - from pydolphinscheduler.tasks.condition import ( - FAILURE, - SUCCESS, - And, - Condition, - Or, - ) - - def get_op_cls(op): - cls = None - if op.lower() == "and": - cls = And - elif op.lower() == "or": - cls = Or - else: - raise Exception("OP must be in And or Or, but get: %s" % op) - return cls - - second_cond_ops = [] - for first_group in task_params["groups"]: - second_op = first_group["op"] - task_ops = [] - for condition_data in first_group["groups"]: - assert "task" in condition_data, "task must be in %s" % condition_data - assert "flag" in condition_data, "flag must be in %s" % condition_data - task_name = condition_data["task"] - flag = condition_data["flag"] - task = name2task[task_name] - - # for example: task = pre_task_1, flag = true - if flag: - task_ops.append(SUCCESS(task)) - else: - task_ops.append(FAILURE(task)) - - second_cond_ops.append(get_op_cls(second_op)(*task_ops)) - - first_op = task_params["op"] - cond_operator = get_op_cls(first_op)(*second_cond_ops) - - condition = Condition( - name=task_params["name"], - condition=cond_operator, - success_task=name2task[task_params["success_task"]], - failed_task=name2task[task_params["failed_task"]], - ) - return condition - - def parse_dependent(self, task_params, name2task): - """Parse Dependent Task. - - This is an example Yaml fragment of task_params - - name: dependent - denpendence: - OP: AND - groups: - - - OP: Or - groups: - - [pydolphin, task_dependent_external, task_1] - - [pydolphin, task_dependent_external, task_2] - - - OP: And - groups: - - [pydolphin, task_dependent_external, task_1, LAST_WEDNESDAY] - - [pydolphin, task_dependent_external, task_2, last24Hours] - - """ - from pydolphinscheduler.tasks.dependent import ( - And, - Dependent, - DependentDate, - DependentItem, - Or, - ) - - def process_dependent_date(dependent_date): - """Parse dependent date (Compatible with key and value of DependentDate).""" - dependent_date_upper = dependent_date.upper() - if hasattr(DependentDate, dependent_date_upper): - dependent_date = getattr(DependentDate, dependent_date_upper) - return dependent_date - - def get_op_cls(op): - cls = None - if op.lower() == "and": - cls = And - elif op.lower() == "or": - cls = Or - else: - raise Exception("OP must be in And or Or, but get: %s" % op) - return cls - - def create_dependent_item(source_items): - """Parse dependent item. - - project_name: pydolphin - process_definition_name: task_dependent_external - dependent_task_name: task_1 - dependent_date: LAST_WEDNESDAY - """ - project_name = source_items["project_name"] - process_definition_name = source_items["process_definition_name"] - dependent_task_name = source_items["dependent_task_name"] - dependent_date = source_items.get("dependent_date", DependentDate.TODAY) - dependent_item = DependentItem( - project_name=project_name, - process_definition_name=process_definition_name, - dependent_task_name=dependent_task_name, - dependent_date=process_dependent_date(dependent_date), - ) - - return dependent_item - - second_dependences = [] - for first_group in task_params["groups"]: - second_op = first_group[KEY_OP] - dependence_items = [] - for source_items in first_group["groups"]: - dependence_items.append(create_dependent_item(source_items)) - - second_dependences.append(get_op_cls(second_op)(*dependence_items)) - - first_op = task_params[KEY_OP] - dependence = get_op_cls(first_op)(*second_dependences) - - task = Dependent( - name=task_params["name"], - dependence=dependence, - ) - return task - - -def create_process_definition(yaml_file): - """CLI.""" - YamlProcess.parse(yaml_file) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/default_config.yaml b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/default_config.yaml deleted file mode 100644 index 98d7b99fdc..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/default_config.yaml +++ /dev/null @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Setting about Java gateway server -java_gateway: - # The address of Python gateway server start. Set its value to `0.0.0.0` if your Python API run in different - # between Python gateway server. It could be be specific to other address like `127.0.0.1` or `localhost` - address: 127.0.0.1 - - # The port of Python gateway server start. Define which port you could connect to Python gateway server from - # Python API models. - port: 25333 - - # Whether automatically convert Python objects to Java Objects. Default value is ``True``. There is some - # performance lost when set to ``True`` but for now pydolphinscheduler do not handle the convert issue between - # java and Python, mark it as TODO item in the future. - auto_convert: true - -# Setting about dolphinscheduler default value, will use the value set below if property do not set, which -# including ``user``, ``workflow`` -default: - # Default value for dolphinscheduler's user object - user: - name: userPythonGateway - password: userPythonGateway - email: userPythonGateway@dolphinscheduler.com - tenant: tenant_pydolphin - phone: 11111111111 - state: 1 - # Default value for dolphinscheduler's workflow object - workflow: - project: project-pydolphin - tenant: tenant_pydolphin - user: userPythonGateway - queue: queuePythonGateway - worker_group: default - # Release state of workflow, default value is ``online`` which mean setting workflow online when it submits - # to Java gateway, if you want to set workflow offline set its value to ``offline`` - release_state: online - time_zone: Asia/Shanghai - # Warning type of the workflow, default value is ``NONE`` mean do not warn user in any cases of workflow state, - # change to ``FAILURE`` if you want to warn users when workflow failed. All available enum value are - # ``NONE``, ``SUCCESS``, ``FAILURE``, ``ALL`` - warning_type: NONE diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/__init__.py deleted file mode 100644 index 37b2e5b61c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init examples package which provides users with pydolphinscheduler examples.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/bulk_create_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/bulk_create_example.py deleted file mode 100644 index 72bdb02243..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/bulk_create_example.py +++ /dev/null @@ -1,55 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -This example show you how to create workflows in batch mode. - -After this example run, we will create 10 workflows named `workflow:`, and with 3 tasks -named `task:-workflow:` in each workflow. Task shape as below - -task:1-workflow:1 -> task:2-workflow:1 -> task:3-workflow:1 - -Each workflow is linear since we set `IS_CHAIN=True`, you could change task to parallel by set it to `False`. -""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.shell import Shell - -NUM_WORKFLOWS = 10 -NUM_TASKS = 5 -# Make sure your tenant exists in your operator system -TENANT = "exists_tenant" -# Whether task should dependent on pre one or not -# False will create workflow with independent task, while True task will dependent on pre-task and dependence -# link like `pre_task -> current_task -> next_task`, default True -IS_CHAIN = True - -for wf in range(0, NUM_WORKFLOWS): - workflow_name = f"workflow:{wf}" - - with ProcessDefinition(name=workflow_name, tenant=TENANT) as pd: - for t in range(0, NUM_TASKS): - task_name = f"task:{t}-{workflow_name}" - command = f"echo This is task {task_name}" - task = Shell(name=task_name, command=command) - - if IS_CHAIN and t > 0: - pre_task_name = f"task:{t-1}-{workflow_name}" - pd.get_one_task_by_name(pre_task_name) >> task - - # We just submit workflow and task definition without set schedule time or run it manually - pd.submit() diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_condition_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_condition_example.py deleted file mode 100644 index 2d73df4b40..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_condition_example.py +++ /dev/null @@ -1,59 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -r""" -A example workflow for task condition. - -This example will create five task in single workflow, with four shell task and one condition task. Task -condition have one upstream which we declare explicit with syntax `parent >> condition`, and three downstream -automatically set dependence by condition task by passing parameter `condition`. The graph of this workflow -like: -pre_task_1 -> -> success_branch - \ / -pre_task_2 -> -> conditions -> - / \ -pre_task_3 -> -> fail_branch -. -""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.condition import FAILURE, SUCCESS, And, Condition -from pydolphinscheduler.tasks.shell import Shell - -with ProcessDefinition(name="task_condition_example", tenant="tenant_exists") as pd: - pre_task_1 = Shell(name="pre_task_1", command="echo pre_task_1") - pre_task_2 = Shell(name="pre_task_2", command="echo pre_task_2") - pre_task_3 = Shell(name="pre_task_3", command="echo pre_task_3") - cond_operator = And( - And( - SUCCESS(pre_task_1, pre_task_2), - FAILURE(pre_task_3), - ), - ) - - success_branch = Shell(name="success_branch", command="echo success_branch") - fail_branch = Shell(name="fail_branch", command="echo fail_branch") - - condition = Condition( - name="condition", - condition=cond_operator, - success_task=success_branch, - failed_task=fail_branch, - ) - pd.submit() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_datax_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_datax_example.py deleted file mode 100644 index 94bd449cf7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_datax_example.py +++ /dev/null @@ -1,95 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -""" -A example workflow for task datax. - -This example will create a workflow named `task_datax`. -`task_datax` is true workflow define and run task task_datax. -You can create data sources `first_mysql` and `first_mysql` through UI. -It creates a task to synchronize datax from the source database to the target database. -""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.datax import CustomDataX, DataX - -# datax json template -JSON_TEMPLATE = { - "job": { - "content": [ - { - "reader": { - "name": "mysqlreader", - "parameter": { - "username": "usr", - "password": "pwd", - "column": ["id", "name", "code", "description"], - "splitPk": "id", - "connection": [ - { - "table": ["source_table"], - "jdbcUrl": ["jdbc:mysql://127.0.0.1:3306/source_db"], - } - ], - }, - }, - "writer": { - "name": "mysqlwriter", - "parameter": { - "writeMode": "insert", - "username": "usr", - "password": "pwd", - "column": ["id", "name"], - "connection": [ - { - "jdbcUrl": "jdbc:mysql://127.0.0.1:3306/target_db", - "table": ["target_table"], - } - ], - }, - }, - } - ], - "setting": { - "errorLimit": {"percentage": 0, "record": 0}, - "speed": {"channel": 1, "record": 1000}, - }, - } -} - -with ProcessDefinition( - name="task_datax_example", - tenant="tenant_exists", -) as pd: - # This task synchronizes the data in `t_ds_project` - # of `first_mysql` database to `target_project` of `second_mysql` database. - # You have to make sure data source named `first_mysql` and `second_mysql` exists - # in your environment. - task1 = DataX( - name="task_datax", - datasource_name="first_mysql", - datatarget_name="second_mysql", - sql="select id, name, code, description from source_table", - target_table="target_table", - ) - - # You can custom json_template of datax to sync data. This task create a new - # datax job same as task1, transfer record from `first_mysql` to `second_mysql` - task2 = CustomDataX(name="task_custom_datax", json=str(JSON_TEMPLATE)) - pd.run() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dependent_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dependent_example.py deleted file mode 100644 index db53bcc9f3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dependent_example.py +++ /dev/null @@ -1,74 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -r""" -A example workflow for task dependent. - -This example will create two workflows named `task_dependent` and `task_dependent_external`. -`task_dependent` is true workflow define and run task dependent, while `task_dependent_external` -define outside workflow and task from dependent. - -After this script submit, we would get workflow as below: - -task_dependent_external: - -task_1 -task_2 -task_3 - -task_dependent: - -task_dependent(this task dependent on task_dependent_external.task_1 and task_dependent_external.task_2). -""" -from pydolphinscheduler import configuration -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.dependent import And, Dependent, DependentItem, Or -from pydolphinscheduler.tasks.shell import Shell - -with ProcessDefinition( - name="task_dependent_external", - tenant="tenant_exists", -) as pd: - task_1 = Shell(name="task_1", command="echo task 1") - task_2 = Shell(name="task_2", command="echo task 2") - task_3 = Shell(name="task_3", command="echo task 3") - pd.submit() - -with ProcessDefinition( - name="task_dependent_example", - tenant="tenant_exists", -) as pd: - task = Dependent( - name="task_dependent", - dependence=And( - Or( - DependentItem( - project_name=configuration.WORKFLOW_PROJECT, - process_definition_name="task_dependent_external", - dependent_task_name="task_1", - ), - DependentItem( - project_name=configuration.WORKFLOW_PROJECT, - process_definition_name="task_dependent_external", - dependent_task_name="task_2", - ), - ) - ), - ) - pd.submit() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dvc_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dvc_example.py deleted file mode 100644 index 2b93cd14b7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dvc_example.py +++ /dev/null @@ -1,52 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task dvc.""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks import DVCDownload, DVCInit, DVCUpload - -repository = "git@github.com:/dvc-data-repository-example.git" - -with ProcessDefinition( - name="task_dvc_example", - tenant="tenant_exists", -) as pd: - init_task = DVCInit(name="init_dvc", repository=repository, store_url="~/dvc_data") - upload_task = DVCUpload( - name="upload_data", - repository=repository, - data_path_in_dvc_repository="iris", - data_path_in_worker="~/source/iris", - version="v1", - message="upload iris data v1", - ) - - download_task = DVCDownload( - name="download_data", - repository=repository, - data_path_in_dvc_repository="iris", - data_path_in_worker="~/target/iris", - version="v1", - ) - - init_task >> upload_task >> download_task - - pd.run() - -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_flink_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_flink_example.py deleted file mode 100644 index 1e8a040c65..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_flink_example.py +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task flink.""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.flink import DeployMode, Flink, ProgramType - -with ProcessDefinition(name="task_flink_example", tenant="tenant_exists") as pd: - task = Flink( - name="task_flink", - main_class="org.apache.flink.streaming.examples.wordcount.WordCount", - main_package="WordCount.jar", - program_type=ProgramType.JAVA, - deploy_mode=DeployMode.LOCAL, - ) - pd.run() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_map_reduce_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_map_reduce_example.py deleted file mode 100644 index 39b204f82a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_map_reduce_example.py +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task mr.""" - -from pydolphinscheduler.core.engine import ProgramType -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.map_reduce import MR - -with ProcessDefinition(name="task_map_reduce_example", tenant="tenant_exists") as pd: - task = MR( - name="task_mr", - main_class="wordcount", - main_package="hadoop-mapreduce-examples-3.3.1.jar", - program_type=ProgramType.JAVA, - main_args="/dolphinscheduler/tenant_exists/resources/file.txt /output/ds", - ) - pd.run() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py deleted file mode 100644 index c2734bcf81..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py +++ /dev/null @@ -1,93 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task mlflow.""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.mlflow import ( - MLflowDeployType, - MLflowModels, - MLFlowProjectsAutoML, - MLFlowProjectsBasicAlgorithm, - MLFlowProjectsCustom, -) - -mlflow_tracking_uri = "http://127.0.0.1:5000" - -with ProcessDefinition( - name="task_mlflow_example", - tenant="tenant_exists", -) as pd: - - # run custom mlflow project to train model - train_custom = MLFlowProjectsCustom( - name="train_xgboost_native", - repository="https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native", - mlflow_tracking_uri=mlflow_tracking_uri, - parameters="-P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9", - experiment_name="xgboost", - ) - - # run automl to train model - train_automl = MLFlowProjectsAutoML( - name="train_automl", - mlflow_tracking_uri=mlflow_tracking_uri, - parameters="time_budget=30;estimator_list=['lgbm']", - experiment_name="automl_iris", - model_name="iris_A", - automl_tool="flaml", - data_path="/data/examples/iris", - ) - - # Using DOCKER to deploy model from train_automl - deploy_docker = MLflowModels( - name="deploy_docker", - model_uri="models:/iris_A/Production", - mlflow_tracking_uri=mlflow_tracking_uri, - deploy_mode=MLflowDeployType.DOCKER, - port=7002, - ) - - train_automl >> deploy_docker - - # run lightgbm to train model - train_basic_algorithm = MLFlowProjectsBasicAlgorithm( - name="train_basic_algorithm", - mlflow_tracking_uri=mlflow_tracking_uri, - parameters="n_estimators=200;learning_rate=0.2", - experiment_name="basic_algorithm_iris", - model_name="iris_B", - algorithm="lightgbm", - data_path="/data/examples/iris", - search_params="max_depth=[5, 10];n_estimators=[100, 200]", - ) - - # Using MLFLOW to deploy model from training lightgbm project - deploy_mlflow = MLflowModels( - name="deploy_mlflow", - model_uri="models:/iris_B/Production", - mlflow_tracking_uri=mlflow_tracking_uri, - deploy_mode=MLflowDeployType.MLFLOW, - port=7001, - ) - - train_basic_algorithm >> deploy_mlflow - - pd.submit() - -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_openmldb_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_openmldb_example.py deleted file mode 100644 index 5b90091ecf..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_openmldb_example.py +++ /dev/null @@ -1,43 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task openmldb.""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.openmldb import OpenMLDB - -sql = """USE demo_db; -set @@job_timeout=200000; -LOAD DATA INFILE 'file:///tmp/train_sample.csv' -INTO TABLE talkingdata OPTIONS(mode='overwrite'); -""" - -with ProcessDefinition( - name="task_openmldb_example", - tenant="tenant_exists", -) as pd: - task_openmldb = OpenMLDB( - name="task_openmldb", - zookeeper="127.0.0.1:2181", - zookeeper_path="/openmldb", - execute_mode="offline", - sql=sql, - ) - - pd.run() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_pytorch_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_pytorch_example.py deleted file mode 100644 index 6559c9ac65..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_pytorch_example.py +++ /dev/null @@ -1,62 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task pytorch.""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.pytorch import Pytorch - -with ProcessDefinition( - name="task_pytorch_example", - tenant="tenant_exists", -) as pd: - - # run project with existing environment - task_existing_env = Pytorch( - name="task_existing_env", - script="main.py", - script_params="--dry-run --no-cuda", - project_path="https://github.com/pytorch/examples#mnist", - python_command="/home/anaconda3/envs/pytorch/bin/python3", - ) - - # run project with creating conda environment - task_conda_env = Pytorch( - name="task_conda_env", - script="main.py", - script_params="--dry-run --no-cuda", - project_path="https://github.com/pytorch/examples#mnist", - is_create_environment=True, - python_env_tool="conda", - requirements="requirements.txt", - conda_python_version="3.7", - ) - - # run project with creating virtualenv environment - task_virtualenv_env = Pytorch( - name="task_virtualenv_env", - script="main.py", - script_params="--dry-run --no-cuda", - project_path="https://github.com/pytorch/examples#mnist", - is_create_environment=True, - python_env_tool="virtualenv", - requirements="requirements.txt", - ) - - pd.submit() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_sagemaker_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_sagemaker_example.py deleted file mode 100644 index b056f61a63..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_sagemaker_example.py +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task sagemaker.""" -import json - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.sagemaker import SageMaker - -sagemaker_request_data = { - "ParallelismConfiguration": {"MaxParallelExecutionSteps": 1}, - "PipelineExecutionDescription": "test Pipeline", - "PipelineExecutionDisplayName": "AbalonePipeline", - "PipelineName": "AbalonePipeline", - "PipelineParameters": [ - {"Name": "ProcessingInstanceType", "Value": "ml.m4.xlarge"}, - {"Name": "ProcessingInstanceCount", "Value": "2"}, - ], -} - -with ProcessDefinition( - name="task_sagemaker_example", - tenant="tenant_exists", -) as pd: - task_sagemaker = SageMaker( - name="task_sagemaker", - sagemaker_request_json=json.dumps(sagemaker_request_data, indent=2), - ) - - pd.run() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_spark_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_spark_example.py deleted file mode 100644 index 594d95f55a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_spark_example.py +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task spark.""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.spark import DeployMode, ProgramType, Spark - -with ProcessDefinition(name="task_spark_example", tenant="tenant_exists") as pd: - task = Spark( - name="task_spark", - main_class="org.apache.spark.examples.SparkPi", - main_package="spark-examples_2.12-3.2.0.jar", - program_type=ProgramType.JAVA, - deploy_mode=DeployMode.LOCAL, - ) - pd.run() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_switch_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_switch_example.py deleted file mode 100644 index 7966af320e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_switch_example.py +++ /dev/null @@ -1,51 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -r""" -A example workflow for task switch. - -This example will create four task in single workflow, with three shell task and one switch task. Task switch -have one upstream which we declare explicit with syntax `parent >> switch`, and two downstream automatically -set dependence by switch task by passing parameter `condition`. The graph of this workflow like: - --> switch_child_1 - / -parent -> switch -> - \ - --> switch_child_2 -. -""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.shell import Shell -from pydolphinscheduler.tasks.switch import Branch, Default, Switch, SwitchCondition - -with ProcessDefinition( - name="task_switch_example", tenant="tenant_exists", param={"var": "1"} -) as pd: - parent = Shell(name="parent", command="echo parent") - switch_child_1 = Shell(name="switch_child_1", command="echo switch_child_1") - switch_child_2 = Shell(name="switch_child_2", command="echo switch_child_2") - switch_condition = SwitchCondition( - Branch(condition="${var} > 1", task=switch_child_1), - Default(task=switch_child_2), - ) - - switch = Switch(name="switch", condition=switch_condition) - parent >> switch - pd.submit() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py deleted file mode 100644 index 0478e68519..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py +++ /dev/null @@ -1,68 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -r""" -A tutorial example take you to experience pydolphinscheduler. - -After tutorial.py file submit to Apache DolphinScheduler server a DAG would be create, -and workflow DAG graph as below: - - --> task_child_one - / \ -task_parent --> --> task_union - \ / - --> task_child_two - -it will instantiate and run all the task it have. -""" - -# [start tutorial] -# [start package_import] -# Import ProcessDefinition object to define your workflow attributes -from pydolphinscheduler.core.process_definition import ProcessDefinition - -# Import task Shell object cause we would create some shell tasks later -from pydolphinscheduler.tasks.shell import Shell - -# [end package_import] - -# [start workflow_declare] -with ProcessDefinition( - name="tutorial", - schedule="0 0 0 * * ? *", - start_time="2021-01-01", - tenant="tenant_exists", -) as pd: - # [end workflow_declare] - # [start task_declare] - task_parent = Shell(name="task_parent", command="echo hello pydolphinscheduler") - task_child_one = Shell(name="task_child_one", command="echo 'child one'") - task_child_two = Shell(name="task_child_two", command="echo 'child two'") - task_union = Shell(name="task_union", command="echo union") - # [end task_declare] - - # [start task_relation_declare] - task_group = [task_child_one, task_child_two] - task_parent.set_downstream(task_group) - - task_union << task_group - # [end task_relation_declare] - - # [start submit_or_run] - pd.run() - # [end submit_or_run] -# [end tutorial] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_decorator.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_decorator.py deleted file mode 100644 index 986c1bbb6e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_decorator.py +++ /dev/null @@ -1,91 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -r""" -A tutorial example take you to experience pydolphinscheduler. - -After tutorial.py file submit to Apache DolphinScheduler server a DAG would be create, -and workflow DAG graph as below: - - --> task_child_one - / \ -task_parent --> --> task_union - \ / - --> task_child_two - -it will instantiate and run all the task it have. -""" - -# [start tutorial] -# [start package_import] -# Import ProcessDefinition object to define your workflow attributes -from pydolphinscheduler.core.process_definition import ProcessDefinition - -# Import task Shell object cause we would create some shell tasks later -from pydolphinscheduler.tasks.func_wrap import task - -# [end package_import] - - -# [start task_declare] -@task -def task_parent(): - """First task in this workflow.""" - print("echo hello pydolphinscheduler") - - -@task -def task_child_one(): - """Child task will be run parallel after task ``task_parent`` finished.""" - print("echo 'child one'") - - -@task -def task_child_two(): - """Child task will be run parallel after task ``task_parent`` finished.""" - print("echo 'child two'") - - -@task -def task_union(): - """Last task in this workflow.""" - print("echo union") - - -# [end task_declare] - - -# [start workflow_declare] -with ProcessDefinition( - name="tutorial_decorator", - schedule="0 0 0 * * ? *", - start_time="2021-01-01", - tenant="tenant_exists", -) as pd: - # [end workflow_declare] - - # [start task_relation_declare] - task_group = [task_child_one(), task_child_two()] - task_parent().set_downstream(task_group) - - task_union() << task_group - # [end task_relation_declare] - - # [start submit_or_run] - pd.run() - # [end submit_or_run] -# [end tutorial] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_resource_plugin.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_resource_plugin.py deleted file mode 100644 index 5b02022ee9..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_resource_plugin.py +++ /dev/null @@ -1,64 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -r""" -A tutorial example take you to experience pydolphinscheduler resource plugin. - -Resource plug-ins can be defined in workflows and tasks - -it will instantiate and run all the task it have. -""" -import os -from pathlib import Path - -# [start tutorial_resource_plugin] -# [start package_import] -# Import ProcessDefinition object to define your workflow attributes -from pydolphinscheduler.core.process_definition import ProcessDefinition - -# Import task Shell object cause we would create some shell tasks later -from pydolphinscheduler.resources_plugin.local import Local -from pydolphinscheduler.tasks.shell import Shell - -# [end package_import] - -# [start workflow_declare] -with ProcessDefinition( - name="tutorial_resource_plugin", - schedule="0 0 0 * * ? *", - start_time="2021-01-01", - tenant="tenant_exists", - resource_plugin=Local("/tmp"), -) as process_definition: - # [end workflow_declare] - # [start task_declare] - file = "resource.sh" - path = Path("/tmp").joinpath(file) - with open(str(path), "w") as f: - f.write("echo tutorial resource plugin") - task_parent = Shell( - name="local-resource-example", - command=file, - ) - print(task_parent.task_params) - os.remove(path) - # [end task_declare] - - # [start submit_or_run] - process_definition.run() - # [end submit_or_run] -# [end tutorial_resource_plugin] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/exceptions.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/exceptions.py deleted file mode 100644 index 5b0d1bb61f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/exceptions.py +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Exceptions for pydolphinscheduler.""" - - -class PyDSBaseException(Exception): - """Base exception for pydolphinscheduler.""" - - -class PyDSParamException(PyDSBaseException): - """Exception for pydolphinscheduler parameter verify error.""" - - -class PyDSTaskNoFoundException(PyDSBaseException): - """Exception for pydolphinscheduler workflow task no found error.""" - - -class PyDSJavaGatewayException(PyDSBaseException): - """Exception for pydolphinscheduler Java gateway error.""" - - -class PyDSProcessDefinitionNotAssignException(PyDSBaseException): - """Exception for pydolphinscheduler process definition not assign error.""" - - -class PyDSConfException(PyDSBaseException): - """Exception for pydolphinscheduler configuration error.""" - - -class PyResPluginException(PyDSBaseException): - """Exception for pydolphinscheduler resource plugin error.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/java_gateway.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/java_gateway.py deleted file mode 100644 index 54bb0a38b2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/java_gateway.py +++ /dev/null @@ -1,308 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module java gateway, contain gateway behavior.""" - -import contextlib -from logging import getLogger -from typing import Any, Optional - -from py4j.java_collections import JavaMap -from py4j.java_gateway import GatewayParameters, JavaGateway -from py4j.protocol import Py4JError - -from pydolphinscheduler import __version__, configuration -from pydolphinscheduler.constants import JavaGatewayDefault -from pydolphinscheduler.exceptions import PyDSJavaGatewayException - -logger = getLogger(__name__) - - -def launch_gateway( - address: Optional[str] = None, - port: Optional[int] = None, - auto_convert: Optional[bool] = True, -) -> JavaGateway: - """Launch java gateway to pydolphinscheduler. - - TODO Note that automatic conversion makes calling Java methods slightly less efficient because - in the worst case, Py4J needs to go through all registered converters for all parameters. - This is why automatic conversion is disabled by default. - """ - gateway_parameters = GatewayParameters( - address=address or configuration.JAVA_GATEWAY_ADDRESS, - port=port or configuration.JAVA_GATEWAY_PORT, - auto_convert=auto_convert or configuration.JAVA_GATEWAY_AUTO_CONVERT, - ) - gateway = JavaGateway(gateway_parameters=gateway_parameters) - return gateway - - -def gateway_result_checker( - result: JavaMap, - msg_check: Optional[str] = JavaGatewayDefault.RESULT_MESSAGE_SUCCESS, -) -> Any: - """Check weather java gateway result success or not.""" - if ( - result[JavaGatewayDefault.RESULT_STATUS_KEYWORD].toString() - != JavaGatewayDefault.RESULT_STATUS_SUCCESS - ): - raise PyDSJavaGatewayException("Failed when try to got result for java gateway") - if ( - msg_check is not None - and result[JavaGatewayDefault.RESULT_MESSAGE_KEYWORD] != msg_check - ): - raise PyDSJavaGatewayException("Get result state not success.") - return result - - -class JavaGate: - """Launch java gateway to pydolphin scheduler.""" - - def __init__( - self, - address: Optional[str] = None, - port: Optional[int] = None, - auto_convert: Optional[bool] = True, - ): - self.java_gateway = launch_gateway(address, port, auto_convert) - gateway_version = "unknown" - with contextlib.suppress(Py4JError): - # 1. Java gateway version is too old: doesn't have method 'getGatewayVersion()' - # 2. Error connecting to Java gateway - gateway_version = self.get_gateway_version() - if gateway_version != __version__: - logger.warning( - f"Using unmatched version of pydolphinscheduler (version {__version__}) " - f"and Java gateway (version {gateway_version}) may cause errors. " - "We strongly recommend you to find the matched version " - "(check: https://pypi.org/project/apache-dolphinscheduler)" - ) - - def get_gateway_version(self): - """Get the java gateway version, expected to be equal with pydolphinscheduler.""" - return self.java_gateway.entry_point.getGatewayVersion() - - def get_datasource_info(self, name: str): - """Get datasource info through java gateway.""" - return self.java_gateway.entry_point.getDatasourceInfo(name) - - def get_resources_file_info(self, program_type: str, main_package: str): - """Get resources file info through java gateway.""" - return self.java_gateway.entry_point.getResourcesFileInfo( - program_type, main_package - ) - - def create_or_update_resource( - self, user_name: str, name: str, content: str, description: Optional[str] = None - ): - """Create or update resource through java gateway.""" - return self.java_gateway.entry_point.createOrUpdateResource( - user_name, name, description, content - ) - - def query_resources_file_info(self, user_name: str, name: str): - """Get resources file info through java gateway.""" - return self.java_gateway.entry_point.queryResourcesFileInfo(user_name, name) - - def query_environment_info(self, name: str): - """Get environment info through java gateway.""" - return self.java_gateway.entry_point.getEnvironmentInfo(name) - - def get_code_and_version( - self, project_name: str, process_definition_name: str, task_name: str - ): - """Get code and version through java gateway.""" - return self.java_gateway.entry_point.getCodeAndVersion( - project_name, process_definition_name, task_name - ) - - def create_or_grant_project( - self, user: str, name: str, description: Optional[str] = None - ): - """Create or grant project through java gateway.""" - return self.java_gateway.entry_point.createOrGrantProject( - user, name, description - ) - - def query_project_by_name(self, user: str, name: str): - """Query project through java gateway.""" - return self.java_gateway.entry_point.queryProjectByName(user, name) - - def update_project( - self, user: str, project_code: int, project_name: str, description: str - ): - """Update project through java gateway.""" - return self.java_gateway.entry_point.updateProject( - user, project_code, project_name, description - ) - - def delete_project(self, user: str, code: int): - """Delete project through java gateway.""" - return self.java_gateway.entry_point.deleteProject(user, code) - - def create_tenant( - self, tenant_name: str, queue_name: str, description: Optional[str] = None - ): - """Create tenant through java gateway.""" - return self.java_gateway.entry_point.createTenant( - tenant_name, description, queue_name - ) - - def query_tenant(self, tenant_code: str): - """Query tenant through java gateway.""" - return self.java_gateway.entry_point.queryTenantByCode(tenant_code) - - def grant_tenant_to_user(self, user_name: str, tenant_code: str): - """Grant tenant to user through java gateway.""" - return self.java_gateway.entry_point.grantTenantToUser(user_name, tenant_code) - - def update_tenant( - self, - user: str, - tenant_id: int, - code: str, - queue_id: int, - description: Optional[str] = None, - ): - """Update tenant through java gateway.""" - return self.java_gateway.entry_point.updateTenant( - user, tenant_id, code, queue_id, description - ) - - def delete_tenant(self, user: str, tenant_id: int): - """Delete tenant through java gateway.""" - return self.java_gateway.entry_point.deleteTenantById(user, tenant_id) - - def create_user( - self, - name: str, - password: str, - email: str, - phone: str, - tenant: str, - queue: str, - status: int, - ): - """Create user through java gateway.""" - return self.java_gateway.entry_point.createUser( - name, password, email, phone, tenant, queue, status - ) - - def query_user(self, user_id: int): - """Query user through java gateway.""" - return self.java_gateway.queryUser(user_id) - - def update_user( - self, - name: str, - password: str, - email: str, - phone: str, - tenant: str, - queue: str, - status: int, - ): - """Update user through java gateway.""" - return self.java_gateway.entry_point.updateUser( - name, password, email, phone, tenant, queue, status - ) - - def delete_user(self, name: str, user_id: int): - """Delete user through java gateway.""" - return self.java_gateway.entry_point.deleteUser(name, user_id) - - def get_dependent_info( - self, - project_name: str, - process_definition_name: str, - task_name: Optional[str] = None, - ): - """Get dependent info through java gateway.""" - return self.java_gateway.entry_point.getDependentInfo( - project_name, process_definition_name, task_name - ) - - def get_process_definition_info( - self, user_name: str, project_name: str, process_definition_name: str - ): - """Get process definition info through java gateway.""" - return self.java_gateway.entry_point.getProcessDefinitionInfo( - user_name, project_name, process_definition_name - ) - - def create_or_update_process_definition( - self, - user_name: str, - project_name: str, - name: str, - description: str, - global_params: str, - warning_type: str, - warning_group_id: int, - timeout: int, - worker_group: str, - tenant_code: str, - release_state: int, - task_relation_json: str, - task_definition_json: str, - schedule: Optional[str] = None, - other_params_json: Optional[str] = None, - execution_type: Optional[str] = None, - ): - """Create or update process definition through java gateway.""" - return self.java_gateway.entry_point.createOrUpdateProcessDefinition( - user_name, - project_name, - name, - description, - global_params, - schedule, - warning_type, - warning_group_id, - timeout, - worker_group, - tenant_code, - release_state, - task_relation_json, - task_definition_json, - other_params_json, - execution_type, - ) - - def exec_process_instance( - self, - user_name: str, - project_name: str, - process_definition_name: str, - cron_time: str, - worker_group: str, - warning_type: str, - warning_group_id: int, - timeout: int, - ): - """Exec process instance through java gateway.""" - return self.java_gateway.entry_point.execProcessInstance( - user_name, - project_name, - process_definition_name, - cron_time, - worker_group, - warning_type, - warning_group_id, - timeout, - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/__init__.py deleted file mode 100644 index b289954caa..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init Models package, keeping object related to DolphinScheduler covert from Java Gateway Service.""" - -from pydolphinscheduler.models.base import Base -from pydolphinscheduler.models.base_side import BaseSide -from pydolphinscheduler.models.project import Project -from pydolphinscheduler.models.queue import Queue -from pydolphinscheduler.models.tenant import Tenant -from pydolphinscheduler.models.user import User -from pydolphinscheduler.models.worker_group import WorkerGroup - -__all__ = [ - "Base", - "BaseSide", - "Project", - "Tenant", - "User", - "Queue", - "WorkerGroup", -] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base.py deleted file mode 100644 index 2647714af0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base.py +++ /dev/null @@ -1,74 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler Base object.""" - -from typing import Dict, Optional - -# from pydolphinscheduler.models.user import User -from pydolphinscheduler.utils.string import attr2camel - - -class Base: - """DolphinScheduler Base object.""" - - # Object key attribute, to test whether object equals and so on. - _KEY_ATTR: set = {"name", "description"} - - # Object defines attribute, use when needs to communicate with Java gateway server. - _DEFINE_ATTR: set = set() - - # Object default attribute, will add those attribute to `_DEFINE_ATTR` when init assign missing. - _DEFAULT_ATTR: Dict = {} - - def __init__(self, name: str, description: Optional[str] = None): - self.name = name - self.description = description - - def __repr__(self) -> str: - return f'<{type(self).__name__}: name="{self.name}">' - - def __eq__(self, other): - return type(self) == type(other) and all( - getattr(self, a, None) == getattr(other, a, None) for a in self._KEY_ATTR - ) - - def get_define_custom( - self, camel_attr: bool = True, custom_attr: set = None - ) -> Dict: - """Get object definition attribute by given attr set.""" - content = {} - for attr in custom_attr: - val = getattr(self, attr, None) - if camel_attr: - content[attr2camel(attr)] = val - else: - content[attr] = val - return content - - def get_define(self, camel_attr: bool = True) -> Dict: - """Get object definition attribute communicate to Java gateway server. - - use attribute `self._DEFINE_ATTR` to determine which attributes should including when - object tries to communicate with Java gateway server. - """ - content = self.get_define_custom(camel_attr, self._DEFINE_ATTR) - update_default = { - k: self._DEFAULT_ATTR.get(k) for k in self._DEFAULT_ATTR if k not in content - } - content.update(update_default) - return content diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base_side.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base_side.py deleted file mode 100644 index 99b4007a85..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base_side.py +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module for models object.""" - -from typing import Optional - -from pydolphinscheduler import configuration -from pydolphinscheduler.models import Base - - -class BaseSide(Base): - """Base class for models object, it declare base behavior for them.""" - - def __init__(self, name: str, description: Optional[str] = None): - super().__init__(name, description) - - @classmethod - def create_if_not_exists( - cls, - # TODO comment for avoiding cycle import - # user: Optional[User] = ProcessDefinitionDefault.USER - user=configuration.WORKFLOW_USER, - ): - """Create Base if not exists.""" - raise NotImplementedError - - def delete_all(self): - """Delete all method.""" - if not self: - return - list_pro = [key for key in self.__dict__.keys()] - for key in list_pro: - self.__delattr__(key) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/project.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/project.py deleted file mode 100644 index 678332ba3b..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/project.py +++ /dev/null @@ -1,72 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler Project object.""" - -from typing import Optional - -from pydolphinscheduler import configuration -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models import BaseSide - - -class Project(BaseSide): - """DolphinScheduler Project object.""" - - def __init__( - self, - name: str = configuration.WORKFLOW_PROJECT, - description: Optional[str] = None, - code: Optional[int] = None, - ): - super().__init__(name, description) - self.code = code - - def create_if_not_exists(self, user=configuration.USER_NAME) -> None: - """Create Project if not exists.""" - JavaGate().create_or_grant_project(user, self.name, self.description) - # TODO recover result checker - # gateway_result_checker(result, None) - - @classmethod - def get_project_by_name(cls, user=configuration.USER_NAME, name=None) -> "Project": - """Get Project by name.""" - project = JavaGate().query_project_by_name(user, name) - if project is None: - return cls() - return cls( - name=project.getName(), - description=project.getDescription(), - code=project.getCode(), - ) - - def update( - self, - user=configuration.USER_NAME, - project_code=None, - project_name=None, - description=None, - ) -> None: - """Update Project.""" - JavaGate().update_project(user, project_code, project_name, description) - self.name = project_name - self.description = description - - def delete(self, user=configuration.USER_NAME) -> None: - """Delete Project.""" - JavaGate().delete_project(user, self.code) - self.delete_all() diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/queue.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/queue.py deleted file mode 100644 index e6da2594c8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/queue.py +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler User object.""" - -from typing import Optional - -from pydolphinscheduler import configuration -from pydolphinscheduler.models import BaseSide - - -class Queue(BaseSide): - """DolphinScheduler Queue object.""" - - def __init__( - self, - name: str = configuration.WORKFLOW_QUEUE, - description: Optional[str] = "", - ): - super().__init__(name, description) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/tenant.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/tenant.py deleted file mode 100644 index 09b00ccf3a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/tenant.py +++ /dev/null @@ -1,80 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler Tenant object.""" - -from typing import Optional - -from pydolphinscheduler import configuration -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models import BaseSide - - -class Tenant(BaseSide): - """DolphinScheduler Tenant object.""" - - def __init__( - self, - name: str = configuration.WORKFLOW_TENANT, - queue: str = configuration.WORKFLOW_QUEUE, - description: Optional[str] = None, - tenant_id: Optional[int] = None, - code: Optional[str] = None, - user_name: Optional[str] = None, - ): - super().__init__(name, description) - self.tenant_id = tenant_id - self.queue = queue - self.code = code - self.user_name = user_name - - def create_if_not_exists( - self, queue_name: str, user=configuration.USER_NAME - ) -> None: - """Create Tenant if not exists.""" - tenant = JavaGate().create_tenant(self.name, self.description, queue_name) - self.tenant_id = tenant.getId() - self.code = tenant.getTenantCode() - # gateway_result_checker(result, None) - - @classmethod - def get_tenant(cls, code: str) -> "Tenant": - """Get Tenant list.""" - tenant = JavaGate().query_tenant(code) - if tenant is None: - return cls() - return cls( - description=tenant.getDescription(), - tenant_id=tenant.getId(), - code=tenant.getTenantCode(), - queue=tenant.getQueueId(), - ) - - def update( - self, user=configuration.USER_NAME, code=None, queue_id=None, description=None - ) -> None: - """Update Tenant.""" - JavaGate().update_tenant(user, self.tenant_id, code, queue_id, description) - # TODO: check queue_id and queue_name - self.queue = str(queue_id) - self.code = code - self.description = description - - def delete(self) -> None: - """Delete Tenant.""" - JavaGate().delete_tenant(self.user_name, self.tenant_id) - self.delete_all() diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/user.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/user.py deleted file mode 100644 index 57c6af647f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/user.py +++ /dev/null @@ -1,130 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler User object.""" - -from typing import Optional - -from pydolphinscheduler import configuration -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models import BaseSide, Tenant - - -class User(BaseSide): - """DolphinScheduler User object.""" - - _KEY_ATTR = { - "name", - "password", - "email", - "phone", - "tenant", - "queue", - "status", - } - - def __init__( - self, - name: str, - password: Optional[str] = configuration.USER_PASSWORD, - email: Optional[str] = configuration.USER_EMAIL, - phone: Optional[str] = configuration.USER_PHONE, - tenant: Optional[str] = configuration.WORKFLOW_TENANT, - queue: Optional[str] = configuration.WORKFLOW_QUEUE, - status: Optional[int] = configuration.USER_STATE, - ): - super().__init__(name) - self.user_id: Optional[int] = None - self.password = password - self.email = email - self.phone = phone - self.tenant = tenant - self.queue = queue - self.status = status - - def create_tenant_if_not_exists(self) -> None: - """Create tenant object.""" - tenant = Tenant(name=self.tenant, queue=self.queue) - tenant.create_if_not_exists(self.queue) - - def create_if_not_exists(self, **kwargs): - """Create User if not exists.""" - # Should make sure queue already exists. - self.create_tenant_if_not_exists() - user = JavaGate().create_user( - self.name, - self.password, - self.email, - self.phone, - self.tenant, - self.queue, - self.status, - ) - self.user_id = user.getId() - # TODO recover result checker - # gateway_result_checker(result, None) - - @classmethod - def get_user(cls, user_id) -> "User": - """Get User.""" - user = JavaGate().query_user(user_id) - if user is None: - return cls("") - user_id = user.getId() - user = cls( - name=user.getUserName(), - password=user.getUserPassword(), - email=user.getEmail(), - phone=user.getPhone(), - tenant=user.getTenantCode(), - queue=user.getQueueName(), - status=user.getState(), - ) - user.user_id = user_id - return user - - def update( - self, - password=None, - email=None, - phone=None, - tenant=None, - queue=None, - status=None, - ) -> None: - """Update User.""" - user = JavaGate().update_user( - self.name, - password, - email, - phone, - tenant, - queue, - status, - ) - self.user_id = user.getId() - self.name = user.getUserName() - self.password = user.getUserPassword() - self.email = user.getEmail() - self.phone = user.getPhone() - self.queue = user.getQueueName() - self.status = user.getState() - - def delete(self) -> None: - """Delete User.""" - JavaGate().delete_user(self.name, self.user_id) - self.delete_all() diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/worker_group.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/worker_group.py deleted file mode 100644 index bc55eafc34..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/worker_group.py +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler Worker Group object.""" - -from typing import Optional - -from pydolphinscheduler.models import BaseSide - - -class WorkerGroup(BaseSide): - """DolphinScheduler Worker Group object.""" - - def __init__(self, name: str, address: str, description: Optional[str] = None): - super().__init__(name, description) - self.address = address diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/__init__.py deleted file mode 100644 index 1e24e1eb87..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init resources_plugin package.""" -from pydolphinscheduler.resources_plugin.github import GitHub -from pydolphinscheduler.resources_plugin.gitlab import GitLab -from pydolphinscheduler.resources_plugin.local import Local -from pydolphinscheduler.resources_plugin.oss import OSS -from pydolphinscheduler.resources_plugin.s3 import S3 - -__all__ = ["Local", "GitHub", "GitLab", "OSS", "S3"] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/__init__.py deleted file mode 100644 index 4253cda64d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init base package.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/bucket.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/bucket.py deleted file mode 100644 index bae4366c81..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/bucket.py +++ /dev/null @@ -1,86 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler BucketFileInfo and Bucket object.""" -from abc import ABCMeta, abstractmethod -from typing import Optional - - -class BucketFileInfo: - """A class that defines the details of BUCKET files. - - :param bucket: A string representing the bucket to which the bucket file belongs. - :param file_path: A string representing the bucket file path. - """ - - def __init__( - self, - bucket: Optional[str] = None, - file_path: Optional[str] = None, - *args, - **kwargs - ): - self.bucket = bucket - self.file_path = file_path - - -class OSSFileInfo(BucketFileInfo): - """A class that defines the details of OSS files. - - :param endpoint: A string representing the OSS file endpoint. - :param bucket: A string representing the bucket to which the OSS file belongs. - :param file_path: A string representing the OSS file path. - """ - - def __init__( - self, - endpoint: Optional[str] = None, - bucket: Optional[str] = None, - file_path: Optional[str] = None, - *args, - **kwargs - ): - super().__init__(bucket=bucket, file_path=file_path, *args, **kwargs) - self.endpoint = endpoint - - -class S3FileInfo(BucketFileInfo): - """A class that defines the details of S3 files. - - :param bucket: A string representing the bucket to which the S3 file belongs. - :param file_path: A string representing the S3 file path. - """ - - def __init__( - self, - bucket: Optional[str] = None, - file_path: Optional[str] = None, - *args, - **kwargs - ): - super().__init__(bucket=bucket, file_path=file_path, *args, **kwargs) - - -class Bucket(object, metaclass=ABCMeta): - """An abstract class of online code repository based on git implementation.""" - - _bucket_file_info: Optional = None - - @abstractmethod - def get_bucket_file_info(self, path: str): - """Get the detailed information of BUCKET file according to the file URL.""" - raise NotImplementedError diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/git.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/git.py deleted file mode 100644 index 4fc2a17ccb..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/git.py +++ /dev/null @@ -1,115 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler GitFileInfo and Git object.""" - -from abc import ABCMeta, abstractmethod -from typing import Optional - - -class GitFileInfo: - """A class that defines the details of GIT files. - - :param user: A string representing the user the git file belongs to. - :param repo_name: A string representing the repository to which the git file belongs. - :param branch: A string representing the branch to which the git file belongs. - :param file_path: A string representing the git file path. - """ - - def __init__( - self, - user: Optional[str] = None, - repo_name: Optional[str] = None, - branch: Optional[str] = None, - file_path: Optional[str] = None, - *args, - **kwargs - ): - self.user = user - self.repo_name = repo_name - self.branch = branch - self.file_path = file_path - - -class GitHubFileInfo(GitFileInfo): - """A class that defines the details of GitHub files. - - :param user: A string representing the user the GitHub file belongs to. - :param repo_name: A string representing the repository to which the GitHub file belongs. - :param branch: A string representing the branch to which the GitHub file belongs. - :param file_path: A string representing the GitHub file path. - """ - - def __init__( - self, - user: Optional[str] = None, - repo_name: Optional[str] = None, - branch: Optional[str] = None, - file_path: Optional[str] = None, - *args, - **kwargs - ): - super().__init__( - user=user, - repo_name=repo_name, - branch=branch, - file_path=file_path, - *args, - **kwargs - ) - - -class GitLabFileInfo(GitFileInfo): - """A class that defines the details of GitLab files. - - :param host: A string representing the domain name the GitLab file belongs to. - :param user: A string representing the user the GitLab file belongs to. - :param repo_name: A string representing the repository to which the GitLab file belongs. - :param branch: A string representing the branch to which the GitHub file belongs. - :param file_path: A string representing the GitHub file path. - """ - - def __init__( - self, - host: Optional[str] = None, - user: Optional[str] = None, - repo_name: Optional[str] = None, - branch: Optional[str] = None, - file_path: Optional[str] = None, - *args, - **kwargs - ): - super().__init__( - user=user, - repo_name=repo_name, - branch=branch, - file_path=file_path, - *args, - **kwargs - ) - self.host = host - - -class Git(object, metaclass=ABCMeta): - """An abstract class of online code repository based on git implementation.""" - - _git_file_info: Optional = None - - @abstractmethod - def get_git_file_info(self, path: str): - """Get the detailed information of GIT file according to the file URL.""" - raise NotImplementedError diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py deleted file mode 100644 index 45648647c6..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py +++ /dev/null @@ -1,106 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler github resource plugin.""" -import base64 -from typing import Optional -from urllib.parse import urljoin - -import requests - -from pydolphinscheduler.constants import Symbol -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.resources_plugin.base.git import Git, GitHubFileInfo - - -class GitHub(ResourcePlugin, Git): - """GitHub resource plugin, a plugin for task and workflow to dolphinscheduler to read resource. - - :param prefix: A string representing the prefix of GitHub. - :param access_token: A string used for identity authentication of GitHub private repository. - """ - - def __init__( - self, prefix: str, access_token: Optional[str] = None, *args, **kwargs - ): - super().__init__(prefix, *args, **kwargs) - self.access_token = access_token - - _git_file_info: Optional[GitHubFileInfo] = None - - def build_req_api( - self, - user: str, - repo_name: str, - file_path: str, - api: str, - ): - """Build request file content API.""" - api = api.replace("{user}", user) - api = api.replace("{repo_name}", repo_name) - api = api.replace("{file_path}", file_path) - return api - - def get_git_file_info(self, path: str): - """Get file information from the file url, like repository name, user, branch, and file path.""" - elements = path.split(Symbol.SLASH) - index = self.get_index(path, Symbol.SLASH, 7) - index = index + 1 - file_info = GitHubFileInfo( - user=elements[3], - repo_name=elements[4], - branch=elements[6], - file_path=path[index:], - ) - self._git_file_info = file_info - - def get_req_url(self): - """Build request URL according to file information.""" - return self.build_req_api( - user=self._git_file_info.user, - repo_name=self._git_file_info.repo_name, - file_path=self._git_file_info.file_path, - api="https://api.github.com/repos/{user}/{repo_name}/contents/{file_path}", - ) - - def read_file(self, suf: str): - """Get the content of the file. - - The address of the file is the prefix of the resource plugin plus the parameter suf. - """ - path = urljoin(self.prefix, suf) - return self.req(path) - - def req(self, path: str): - """Send HTTP request, parse response data, and get file content.""" - headers = { - "Content-Type": "application/json; charset=utf-8", - } - if self.access_token is not None: - headers.setdefault("Authorization", "Bearer %s" % self.access_token) - self.get_git_file_info(path) - response = requests.get( - headers=headers, - url=self.get_req_url(), - params={"ref": self._git_file_info.branch}, - ) - if response.status_code == requests.codes.ok: - json_response = response.json() - content = base64.b64decode(json_response["content"]) - return content.decode("utf-8") - else: - raise Exception(response.json()) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/gitlab.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/gitlab.py deleted file mode 100644 index f035ecaeff..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/gitlab.py +++ /dev/null @@ -1,112 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler gitlab resource plugin.""" -from typing import Optional -from urllib.parse import urljoin, urlparse - -import gitlab -import requests - -from pydolphinscheduler.constants import Symbol -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.resources_plugin.base.git import Git, GitLabFileInfo - - -class GitLab(ResourcePlugin, Git): - """GitLab object, declare GitLab resource plugin for task and workflow to dolphinscheduler. - - :param prefix: A string representing the prefix of GitLab. - :param private_token: A string used for identity authentication of GitLab private or Internal repository. - :param oauth_token: A string used for identity authentication of GitLab private or Internal repository. - :param username: A string representing the user of the repository. - :param password: A string representing the user password. - """ - - def __init__( - self, - prefix: str, - private_token: Optional[str] = None, - oauth_token: Optional[str] = None, - username: Optional[str] = None, - password: Optional[str] = None, - *args, - **kwargs, - ): - super().__init__(prefix, *args, **kwargs) - self.private_token = private_token - self.oauth_token = oauth_token - self.username = username - self.password = password - - def get_git_file_info(self, path: str): - """Get file information from the file url, like repository name, user, branch, and file path.""" - self.get_index(path, Symbol.SLASH, 8) - result = urlparse(path) - elements = result.path.split(Symbol.SLASH) - self._git_file_info = GitLabFileInfo( - host=f"{result.scheme}://{result.hostname}", - repo_name=elements[2], - branch=elements[5], - file_path=Symbol.SLASH.join( - str(elements[i]) for i in range(6, len(elements)) - ), - user=elements[1], - ) - - def authentication(self): - """Gitlab authentication.""" - host = self._git_file_info.host - if self.private_token is not None: - return gitlab.Gitlab(host, private_token=self.private_token) - if self.oauth_token is not None: - return gitlab.Gitlab(host, oauth_token=self.oauth_token) - if self.username is not None and self.password is not None: - oauth_token = self.OAuth_token() - return gitlab.Gitlab(host, oauth_token=oauth_token) - return gitlab.Gitlab(host) - - def OAuth_token(self): - """Obtain OAuth Token.""" - data = { - "grant_type": "password", - "username": self.username, - "password": self.password, - } - host = self._git_file_info.host - resp = requests.post("%s/oauth/token" % host, data=data) - oauth_token = resp.json()["access_token"] - return oauth_token - - def read_file(self, suf: str): - """Get the content of the file. - - The address of the file is the prefix of the resource plugin plus the parameter suf. - """ - path = urljoin(self.prefix, suf) - self.get_git_file_info(path) - gl = self.authentication() - project = gl.projects.get( - "%s/%s" % (self._git_file_info.user, self._git_file_info.repo_name) - ) - return ( - project.files.get( - file_path=self._git_file_info.file_path, ref=self._git_file_info.branch - ) - .decode() - .decode() - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/local.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/local.py deleted file mode 100644 index c1fc56d3d3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/local.py +++ /dev/null @@ -1,56 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler local resource plugin.""" - -import os -from pathlib import Path - -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.exceptions import PyResPluginException - - -class Local(ResourcePlugin): - """Local object, declare local resource plugin for task and workflow to dolphinscheduler. - - :param prefix: A string representing the prefix of Local. - """ - - # [start init_method] - def __init__(self, prefix: str, *args, **kwargs): - super().__init__(prefix, *args, **kwargs) - - # [end init_method] - - # [start read_file_method] - def read_file(self, suf: str): - """Get the content of the file. - - The address of the file is the prefix of the resource plugin plus the parameter suf. - """ - path = Path(self.prefix).joinpath(suf) - if not path.exists(): - raise PyResPluginException("{} is not found".format(str(path))) - if not os.access(str(path), os.R_OK): - raise PyResPluginException( - "You don't have permission to access {}".format(self.prefix + suf) - ) - with open(path, "r") as f: - content = f.read() - return content - - # [end read_file_method] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/oss.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/oss.py deleted file mode 100644 index 1a9acbb9ca..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/oss.py +++ /dev/null @@ -1,76 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler oss resource plugin.""" -from typing import Optional -from urllib.parse import urljoin, urlparse - -import oss2 - -from pydolphinscheduler.constants import Symbol -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.resources_plugin.base.bucket import Bucket, OSSFileInfo - - -class OSS(ResourcePlugin, Bucket): - """OSS object, declare OSS resource plugin for task and workflow to dolphinscheduler. - - :param prefix: A string representing the prefix of OSS. - :param access_key_id: A string representing the ID of AccessKey for AliCloud OSS. - :param access_key_secret: A string representing the secret of AccessKey for AliCloud OSS. - """ - - def __init__( - self, - prefix: str, - access_key_id: Optional[str] = None, - access_key_secret: Optional[str] = None, - *args, - **kwargs, - ): - super().__init__(prefix, *args, **kwargs) - self.access_key_id = access_key_id - self.access_key_secret = access_key_secret - - _bucket_file_info: Optional[OSSFileInfo] = None - - def get_bucket_file_info(self, path: str): - """Get file information from the file url, like repository name, user, branch, and file path.""" - self.get_index(path, Symbol.SLASH, 3) - result = urlparse(path) - hostname = result.hostname - elements = hostname.split(Symbol.POINT) - self._bucket_file_info = OSSFileInfo( - endpoint=f"{result.scheme}://" - f"{Symbol.POINT.join(str(elements[i]) for i in range(1, len(elements)))}", - bucket=hostname.split(Symbol.POINT)[0], - file_path=result.path[1:], - ) - - def read_file(self, suf: str): - """Get the content of the file. - - The address of the file is the prefix of the resource plugin plus the parameter suf. - """ - path = urljoin(self.prefix, suf) - self.get_bucket_file_info(path) - auth = oss2.Auth(self.access_key_id, self.access_key_secret) - bucket = oss2.Bucket( - auth, self._bucket_file_info.endpoint, self._bucket_file_info.bucket - ) - result = bucket.get_object(self._bucket_file_info.file_path).read().decode() - return result.read().decode() diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/s3.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/s3.py deleted file mode 100644 index da1fe83fd1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/s3.py +++ /dev/null @@ -1,74 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler S3 resource plugin.""" - -from typing import Optional -from urllib.parse import urljoin - -import boto3 - -from pydolphinscheduler.constants import Symbol -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.resources_plugin.base.bucket import Bucket, S3FileInfo - - -class S3(ResourcePlugin, Bucket): - """S3 object, declare S3 resource plugin for task and workflow to dolphinscheduler. - - :param prefix: A string representing the prefix of S3. - :param access_key_id: A string representing the ID of AccessKey for Amazon S3. - :param access_key_secret: A string representing the secret of AccessKey for Amazon S3. - """ - - def __init__( - self, - prefix: str, - access_key_id: Optional[str] = None, - access_key_secret: Optional[str] = None, - *args, - **kwargs - ): - super().__init__(prefix, *args, **kwargs) - self.access_key_id = access_key_id - self.access_key_secret = access_key_secret - - _bucket_file_info: Optional[S3FileInfo] = None - - def get_bucket_file_info(self, path: str): - """Get file information from the file url, like repository name, user, branch, and file path.""" - elements = path.split(Symbol.SLASH) - self.get_index(path, Symbol.SLASH, 3) - self._bucket_file_info = S3FileInfo( - bucket=elements[2].split(Symbol.POINT)[0], - file_path=Symbol.SLASH.join( - str(elements[i]) for i in range(3, len(elements)) - ), - ) - - def read_file(self, suf: str): - """Get the content of the file. - - The address of the file is the prefix of the resource plugin plus the parameter suf. - """ - path = urljoin(self.prefix, suf) - self.get_bucket_file_info(path) - bucket = self._bucket_file_info.bucket - key = self._bucket_file_info.file_path - s3_resource = boto3.resource("s3") - s3_object = s3_resource.Object(bucket, key) - return s3_object.get()["Body"].read().decode("utf-8") diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py deleted file mode 100644 index 972b1b76dd..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py +++ /dev/null @@ -1,69 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init pydolphinscheduler.tasks package.""" - -from pydolphinscheduler.tasks.condition import FAILURE, SUCCESS, And, Condition, Or -from pydolphinscheduler.tasks.datax import CustomDataX, DataX -from pydolphinscheduler.tasks.dependent import Dependent -from pydolphinscheduler.tasks.dvc import DVCDownload, DVCInit, DVCUpload -from pydolphinscheduler.tasks.flink import Flink -from pydolphinscheduler.tasks.http import Http -from pydolphinscheduler.tasks.map_reduce import MR -from pydolphinscheduler.tasks.mlflow import ( - MLflowModels, - MLFlowProjectsAutoML, - MLFlowProjectsBasicAlgorithm, - MLFlowProjectsCustom, -) -from pydolphinscheduler.tasks.openmldb import OpenMLDB -from pydolphinscheduler.tasks.procedure import Procedure -from pydolphinscheduler.tasks.python import Python -from pydolphinscheduler.tasks.pytorch import Pytorch -from pydolphinscheduler.tasks.sagemaker import SageMaker -from pydolphinscheduler.tasks.shell import Shell -from pydolphinscheduler.tasks.spark import Spark -from pydolphinscheduler.tasks.sql import Sql -from pydolphinscheduler.tasks.sub_process import SubProcess -from pydolphinscheduler.tasks.switch import Branch, Default, Switch, SwitchCondition - -__all__ = [ - "Condition", - "DataX", - "CustomDataX", - "Dependent", - "DVCInit", - "DVCUpload", - "DVCDownload", - "Flink", - "Http", - "MR", - "OpenMLDB", - "MLFlowProjectsBasicAlgorithm", - "MLFlowProjectsCustom", - "MLFlowProjectsAutoML", - "MLflowModels", - "Procedure", - "Python", - "Pytorch", - "Shell", - "Spark", - "Sql", - "SubProcess", - "Switch", - "SageMaker", -] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py deleted file mode 100644 index cb139f1587..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py +++ /dev/null @@ -1,204 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task Conditions.""" - -from typing import Dict, List - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.models.base import Base - - -class Status(Base): - """Base class of Condition task status. - - It a parent class for :class:`SUCCESS` and :class:`FAILURE`. Provider status name - and :func:`get_define` to sub class. - """ - - def __init__(self, *tasks): - super().__init__(f"Condition.{self.status_name()}") - self.tasks = tasks - - def __repr__(self) -> str: - return "depend_item_list" - - @classmethod - def status_name(cls) -> str: - """Get name for Status or its sub class.""" - return cls.__name__.upper() - - def get_define(self, camel_attr: bool = True) -> List: - """Get status definition attribute communicate to Java gateway server.""" - content = [] - for task in self.tasks: - if not isinstance(task, Task): - raise PyDSParamException( - "%s only accept class Task or sub class Task, but get %s", - (self.status_name(), type(task)), - ) - content.append({"depTaskCode": task.code, "status": self.status_name()}) - return content - - -class SUCCESS(Status): - """Class SUCCESS to task condition, sub class of :class:`Status`.""" - - def __init__(self, *tasks): - super().__init__(*tasks) - - -class FAILURE(Status): - """Class FAILURE to task condition, sub class of :class:`Status`.""" - - def __init__(self, *tasks): - super().__init__(*tasks) - - -class ConditionOperator(Base): - """Set ConditionTask or ConditionOperator with specific operator.""" - - _DEFINE_ATTR = { - "relation", - } - - def __init__(self, *args): - super().__init__(self.__class__.__name__) - self.args = args - - def __repr__(self) -> str: - return "depend_task_list" - - @classmethod - def operator_name(cls) -> str: - """Get operator name in different class.""" - return cls.__name__.upper() - - @property - def relation(self) -> str: - """Get operator name in different class, for function :func:`get_define`.""" - return self.operator_name() - - def set_define_attr(self) -> str: - """Set attribute to function :func:`get_define`. - - It is a wrapper for both `And` and `Or` operator. - """ - result = [] - attr = None - for condition in self.args: - if isinstance(condition, (Status, ConditionOperator)): - if attr is None: - attr = repr(condition) - elif repr(condition) != attr: - raise PyDSParamException( - "Condition %s operator parameter only support same type.", - self.relation, - ) - else: - raise PyDSParamException( - "Condition %s operator parameter support ConditionTask and ConditionOperator but got %s.", - (self.relation, type(condition)), - ) - if attr == "depend_item_list": - result.extend(condition.get_define()) - else: - result.append(condition.get_define()) - setattr(self, attr, result) - return attr - - def get_define(self, camel_attr=True) -> Dict: - """Overwrite Base.get_define to get task Condition specific get define.""" - attr = self.set_define_attr() - dependent_define_attr = self._DEFINE_ATTR.union({attr}) - return super().get_define_custom( - camel_attr=True, custom_attr=dependent_define_attr - ) - - -class And(ConditionOperator): - """Operator And for task condition. - - It could accept both :class:`Task` and children of :class:`ConditionOperator`, - and set AND condition to those args. - """ - - def __init__(self, *args): - super().__init__(*args) - - -class Or(ConditionOperator): - """Operator Or for task condition. - - It could accept both :class:`Task` and children of :class:`ConditionOperator`, - and set OR condition to those args. - """ - - def __init__(self, *args): - super().__init__(*args) - - -class Condition(Task): - """Task condition object, declare behavior for condition task to dolphinscheduler.""" - - def __init__( - self, - name: str, - condition: ConditionOperator, - success_task: Task, - failed_task: Task, - *args, - **kwargs, - ): - super().__init__(name, TaskType.CONDITIONS, *args, **kwargs) - self.condition = condition - self.success_task = success_task - self.failed_task = failed_task - # Set condition tasks as current task downstream - self._set_dep() - - def _set_dep(self) -> None: - """Set upstream according to parameter `condition`.""" - upstream = [] - for cond in self.condition.args: - if isinstance(cond, ConditionOperator): - for status in cond.args: - upstream.extend(list(status.tasks)) - self.set_upstream(upstream) - self.set_downstream([self.success_task, self.failed_task]) - - @property - def condition_result(self) -> Dict: - """Get condition result define for java gateway.""" - return { - "successNode": [self.success_task.code], - "failedNode": [self.failed_task.code], - } - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for Condition task. - - Condition task have some specials attribute `dependence`, and in most of the task - this attribute is None and use empty dict `{}` as default value. We do not use class - attribute `_task_custom_attr` due to avoid attribute cover. - """ - params = super().task_params - params["dependence"] = self.condition.get_define() - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/datax.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/datax.py deleted file mode 100644 index 945f7824e4..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/datax.py +++ /dev/null @@ -1,127 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task datax.""" - -from typing import Dict, List, Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.database import Database -from pydolphinscheduler.core.task import Task - - -class CustomDataX(Task): - """Task CustomDatax object, declare behavior for custom DataX task to dolphinscheduler. - - You provider json template for DataX, it can synchronize data according to the template you provided. - """ - - CUSTOM_CONFIG = 1 - - _task_custom_attr = {"custom_config", "json", "xms", "xmx"} - - ext: set = {".json"} - ext_attr: str = "_json" - - def __init__( - self, - name: str, - json: str, - xms: Optional[int] = 1, - xmx: Optional[int] = 1, - *args, - **kwargs - ): - self._json = json - super().__init__(name, TaskType.DATAX, *args, **kwargs) - self.custom_config = self.CUSTOM_CONFIG - self.xms = xms - self.xmx = xmx - - -class DataX(Task): - """Task DataX object, declare behavior for DataX task to dolphinscheduler. - - It should run database datax job in multiply sql link engine, such as: - - MySQL - - Oracle - - Postgresql - - SQLServer - You provider datasource_name and datatarget_name contain connection information, it decisions which - database type and database instance would synchronous data. - """ - - CUSTOM_CONFIG = 0 - - _task_custom_attr = { - "custom_config", - "sql", - "target_table", - "job_speed_byte", - "job_speed_record", - "pre_statements", - "post_statements", - "xms", - "xmx", - } - - ext: set = {".sql"} - ext_attr: str = "_sql" - - def __init__( - self, - name: str, - datasource_name: str, - datatarget_name: str, - sql: str, - target_table: str, - job_speed_byte: Optional[int] = 0, - job_speed_record: Optional[int] = 1000, - pre_statements: Optional[List[str]] = None, - post_statements: Optional[List[str]] = None, - xms: Optional[int] = 1, - xmx: Optional[int] = 1, - *args, - **kwargs - ): - self._sql = sql - super().__init__(name, TaskType.DATAX, *args, **kwargs) - self.custom_config = self.CUSTOM_CONFIG - self.datasource_name = datasource_name - self.datatarget_name = datatarget_name - self.target_table = target_table - self.job_speed_byte = job_speed_byte - self.job_speed_record = job_speed_record - self.pre_statements = pre_statements or [] - self.post_statements = post_statements or [] - self.xms = xms - self.xmx = xmx - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for datax task. - - datax task have some specials attribute for task_params, and is odd if we - directly set as python property, so we Override Task.task_params here. - """ - params = super().task_params - datasource = Database(self.datasource_name, "dsType", "dataSource") - params.update(datasource) - - datatarget = Database(self.datatarget_name, "dtType", "dataTarget") - params.update(datatarget) - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dependent.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dependent.py deleted file mode 100644 index 12ec6ba91d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dependent.py +++ /dev/null @@ -1,273 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task dependent.""" - -from typing import Dict, Optional, Tuple - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSJavaGatewayException, PyDSParamException -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models.base import Base - -DEPENDENT_ALL_TASK_IN_WORKFLOW = "0" - - -class DependentDate(str): - """Constant of Dependent date value. - - These values set according to Java server models, if you want to add and change it, - please change Java server models first. - """ - - # TODO Maybe we should add parent level to DependentDate for easy to use, such as - # DependentDate.MONTH.THIS_MONTH - - # Hour - CURRENT_HOUR = "currentHour" - LAST_ONE_HOUR = "last1Hour" - LAST_TWO_HOURS = "last2Hours" - LAST_THREE_HOURS = "last3Hours" - LAST_TWENTY_FOUR_HOURS = "last24Hours" - - # Day - TODAY = "today" - LAST_ONE_DAYS = "last1Days" - LAST_TWO_DAYS = "last2Days" - LAST_THREE_DAYS = "last3Days" - LAST_SEVEN_DAYS = "last7Days" - - # Week - THIS_WEEK = "thisWeek" - LAST_WEEK = "lastWeek" - LAST_MONDAY = "lastMonday" - LAST_TUESDAY = "lastTuesday" - LAST_WEDNESDAY = "lastWednesday" - LAST_THURSDAY = "lastThursday" - LAST_FRIDAY = "lastFriday" - LAST_SATURDAY = "lastSaturday" - LAST_SUNDAY = "lastSunday" - - # Month - THIS_MONTH = "thisMonth" - LAST_MONTH = "lastMonth" - LAST_MONTH_BEGIN = "lastMonthBegin" - LAST_MONTH_END = "lastMonthEnd" - - -class DependentItem(Base): - """Dependent item object, minimal unit for task dependent. - - It declare which project, process_definition, task are dependent to this task. - """ - - _DEFINE_ATTR = { - "project_code", - "definition_code", - "dep_task_code", - "cycle", - "date_value", - } - - # TODO maybe we should conside overwrite operator `and` and `or` for DependentItem to - # support more easy way to set relation - def __init__( - self, - project_name: str, - process_definition_name: str, - dependent_task_name: Optional[str] = DEPENDENT_ALL_TASK_IN_WORKFLOW, - dependent_date: Optional[DependentDate] = DependentDate.TODAY, - ): - obj_name = f"{project_name}.{process_definition_name}.{dependent_task_name}.{dependent_date}" - super().__init__(obj_name) - self.project_name = project_name - self.process_definition_name = process_definition_name - self.dependent_task_name = dependent_task_name - if dependent_date is None: - raise PyDSParamException( - "Parameter dependent_date must provider by got None." - ) - else: - self.dependent_date = dependent_date - self._code = {} - - def __repr__(self) -> str: - return "depend_item_list" - - @property - def project_code(self) -> str: - """Get dependent project code.""" - return self.get_code_from_gateway().get("projectCode") - - @property - def definition_code(self) -> str: - """Get dependent definition code.""" - return self.get_code_from_gateway().get("processDefinitionCode") - - @property - def dep_task_code(self) -> str: - """Get dependent tasks code list.""" - if self.is_all_task: - return DEPENDENT_ALL_TASK_IN_WORKFLOW - else: - return self.get_code_from_gateway().get("taskDefinitionCode") - - # TODO Maybe we should get cycle from dependent date class. - @property - def cycle(self) -> str: - """Get dependent cycle.""" - if "Hour" in self.dependent_date: - return "hour" - elif self.dependent_date == "today" or "Days" in self.dependent_date: - return "day" - elif "Month" in self.dependent_date: - return "month" - else: - return "week" - - @property - def date_value(self) -> str: - """Get dependent date.""" - return self.dependent_date - - @property - def is_all_task(self) -> bool: - """Check whether dependent all tasks or not.""" - return self.dependent_task_name == DEPENDENT_ALL_TASK_IN_WORKFLOW - - @property - def code_parameter(self) -> Tuple: - """Get name info parameter to query code.""" - param = ( - self.project_name, - self.process_definition_name, - self.dependent_task_name if not self.is_all_task else None, - ) - return param - - def get_code_from_gateway(self) -> Dict: - """Get project, definition, task code from given parameter.""" - if self._code: - return self._code - else: - try: - self._code = JavaGate().get_dependent_info(*self.code_parameter) - return self._code - except Exception: - raise PyDSJavaGatewayException("Function get_code_from_gateway error.") - - -class DependentOperator(Base): - """Set DependentItem or dependItemList with specific operator.""" - - _DEFINE_ATTR = { - "relation", - } - - def __init__(self, *args): - super().__init__(self.__class__.__name__) - self.args = args - - def __repr__(self) -> str: - return "depend_task_list" - - @classmethod - def operator_name(cls) -> str: - """Get operator name in different class.""" - return cls.__name__.upper() - - @property - def relation(self) -> str: - """Get operator name in different class, for function :func:`get_define`.""" - return self.operator_name() - - def set_define_attr(self) -> str: - """Set attribute to function :func:`get_define`. - - It is a wrapper for both `And` and `Or` operator. - """ - result = [] - attr = None - for dependent in self.args: - if isinstance(dependent, (DependentItem, DependentOperator)): - if attr is None: - attr = repr(dependent) - elif repr(dependent) != attr: - raise PyDSParamException( - "Dependent %s operator parameter only support same type.", - self.relation, - ) - else: - raise PyDSParamException( - "Dependent %s operator parameter support DependentItem and " - "DependentOperator but got %s.", - (self.relation, type(dependent)), - ) - result.append(dependent.get_define()) - setattr(self, attr, result) - return attr - - def get_define(self, camel_attr=True) -> Dict: - """Overwrite Base.get_define to get task dependent specific get define.""" - attr = self.set_define_attr() - dependent_define_attr = self._DEFINE_ATTR.union({attr}) - return super().get_define_custom( - camel_attr=True, custom_attr=dependent_define_attr - ) - - -class And(DependentOperator): - """Operator And for task dependent. - - It could accept both :class:`DependentItem` and children of :class:`DependentOperator`, - and set AND condition to those args. - """ - - def __init__(self, *args): - super().__init__(*args) - - -class Or(DependentOperator): - """Operator Or for task dependent. - - It could accept both :class:`DependentItem` and children of :class:`DependentOperator`, - and set OR condition to those args. - """ - - def __init__(self, *args): - super().__init__(*args) - - -class Dependent(Task): - """Task dependent object, declare behavior for dependent task to dolphinscheduler.""" - - def __init__(self, name: str, dependence: DependentOperator, *args, **kwargs): - super().__init__(name, TaskType.DEPENDENT, *args, **kwargs) - self.dependence = dependence - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for dependent task. - - Dependent task have some specials attribute `dependence`, and in most of the task - this attribute is None and use empty dict `{}` as default value. We do not use class - attribute `_task_custom_attr` due to avoid attribute cover. - """ - params = super().task_params - params["dependence"] = self.dependence.get_define() - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dvc.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dvc.py deleted file mode 100644 index c5b5cd5c91..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dvc.py +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task dvc.""" -from copy import deepcopy -from typing import Dict - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task - - -class DvcTaskType(str): - """Constants for dvc task type.""" - - INIT = "Init DVC" - DOWNLOAD = "Download" - UPLOAD = "Upload" - - -class BaseDVC(Task): - """Base class for dvc task.""" - - dvc_task_type = None - - _task_custom_attr = { - "dvc_task_type", - "dvc_repository", - } - - _child_task_dvc_attr = set() - - def __init__(self, name: str, repository: str, *args, **kwargs): - super().__init__(name, TaskType.DVC, *args, **kwargs) - self.dvc_repository = repository - - @property - def task_params(self) -> Dict: - """Return task params.""" - self._task_custom_attr = deepcopy(self._task_custom_attr) - self._task_custom_attr.update(self._child_task_dvc_attr) - return super().task_params - - -class DVCInit(BaseDVC): - """Task DVC Init object, declare behavior for DVC Init task to dolphinscheduler.""" - - dvc_task_type = DvcTaskType.INIT - - _child_task_dvc_attr = {"dvc_store_url"} - - def __init__(self, name: str, repository: str, store_url: str, *args, **kwargs): - super().__init__(name, repository, *args, **kwargs) - self.dvc_store_url = store_url - - -class DVCDownload(BaseDVC): - """Task DVC Download object, declare behavior for DVC Download task to dolphinscheduler.""" - - dvc_task_type = DvcTaskType.DOWNLOAD - - _child_task_dvc_attr = { - "dvc_load_save_data_path", - "dvc_data_location", - "dvc_version", - } - - def __init__( - self, - name: str, - repository: str, - data_path_in_dvc_repository: str, - data_path_in_worker: str, - version: str, - *args, - **kwargs - ): - super().__init__(name, repository, *args, **kwargs) - self.dvc_data_location = data_path_in_dvc_repository - self.dvc_load_save_data_path = data_path_in_worker - self.dvc_version = version - - -class DVCUpload(BaseDVC): - """Task DVC Upload object, declare behavior for DVC Upload task to dolphinscheduler.""" - - dvc_task_type = DvcTaskType.UPLOAD - - _child_task_dvc_attr = { - "dvc_load_save_data_path", - "dvc_data_location", - "dvc_version", - "dvc_message", - } - - def __init__( - self, - name: str, - repository: str, - data_path_in_worker: str, - data_path_in_dvc_repository: str, - version: str, - message: str, - *args, - **kwargs - ): - super().__init__(name, repository, *args, **kwargs) - self.dvc_data_location = data_path_in_dvc_repository - self.dvc_load_save_data_path = data_path_in_worker - self.dvc_version = version - self.dvc_message = message diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/flink.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/flink.py deleted file mode 100644 index 83cae956a5..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/flink.py +++ /dev/null @@ -1,93 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task Flink.""" - -from typing import Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.engine import Engine, ProgramType - - -class FlinkVersion(str): - """Flink version, for now it just contain `HIGHT` and `LOW`.""" - - LOW_VERSION = "<1.10" - HIGHT_VERSION = ">=1.10" - - -class DeployMode(str): - """Flink deploy mode, for now it just contain `LOCAL` and `CLUSTER`.""" - - LOCAL = "local" - CLUSTER = "cluster" - - -class Flink(Engine): - """Task flink object, declare behavior for flink task to dolphinscheduler.""" - - _task_custom_attr = { - "deploy_mode", - "flink_version", - "slot", - "task_manager", - "job_manager_memory", - "task_manager_memory", - "app_name", - "parallelism", - "main_args", - "others", - } - - def __init__( - self, - name: str, - main_class: str, - main_package: str, - program_type: Optional[ProgramType] = ProgramType.SCALA, - deploy_mode: Optional[DeployMode] = DeployMode.CLUSTER, - flink_version: Optional[FlinkVersion] = FlinkVersion.LOW_VERSION, - app_name: Optional[str] = None, - job_manager_memory: Optional[str] = "1G", - task_manager_memory: Optional[str] = "2G", - slot: Optional[int] = 1, - task_manager: Optional[int] = 2, - parallelism: Optional[int] = 1, - main_args: Optional[str] = None, - others: Optional[str] = None, - *args, - **kwargs - ): - super().__init__( - name, - TaskType.FLINK, - main_class, - main_package, - program_type, - *args, - **kwargs - ) - self.deploy_mode = deploy_mode - self.flink_version = flink_version - self.app_name = app_name - self.job_manager_memory = job_manager_memory - self.task_manager_memory = task_manager_memory - self.slot = slot - self.task_manager = task_manager - self.parallelism = parallelism - self.main_args = main_args - self.others = others diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/func_wrap.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/func_wrap.py deleted file mode 100644 index c0b73a1fc2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/func_wrap.py +++ /dev/null @@ -1,61 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task function wrapper allows using decorator to create a task.""" - -import functools -import inspect -import itertools -import types - -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.tasks.python import Python - - -def _get_func_str(func: types.FunctionType) -> str: - """Get Python function string without indent from decorator. - - :param func: The function which wraps by decorator ``@task``. - """ - lines = inspect.getsourcelines(func)[0] - - src_strip = "" - lead_space_num = None - for line in lines: - if lead_space_num is None: - lead_space_num = sum(1 for _ in itertools.takewhile(str.isspace, line)) - if line.strip() == "@task": - continue - elif line.strip().startswith("@"): - raise PyDSParamException( - "Do no support other decorators for function ``task`` decorator." - ) - src_strip += line[lead_space_num:] - return src_strip - - -def task(func: types.FunctionType): - """Decorate which covert Python function into pydolphinscheduler task.""" - - @functools.wraps(func) - def wrapper(*args, **kwargs): - func_str = _get_func_str(func) - return Python( - name=kwargs.get("name", func.__name__), definition=func_str, *args, **kwargs - ) - - return wrapper diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/http.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/http.py deleted file mode 100644 index 781333d481..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/http.py +++ /dev/null @@ -1,101 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task shell.""" - -from typing import Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSParamException - - -class HttpMethod: - """Constant of HTTP method.""" - - GET = "GET" - POST = "POST" - HEAD = "HEAD" - PUT = "PUT" - DELETE = "DELETE" - - -class HttpCheckCondition: - """Constant of HTTP check condition. - - For now it contain four value: - - STATUS_CODE_DEFAULT: when http response code equal to 200, mark as success. - - STATUS_CODE_CUSTOM: when http response code equal to the code user define, mark as success. - - BODY_CONTAINS: when http response body contain text user define, mark as success. - - BODY_NOT_CONTAINS: when http response body do not contain text user define, mark as success. - """ - - STATUS_CODE_DEFAULT = "STATUS_CODE_DEFAULT" - STATUS_CODE_CUSTOM = "STATUS_CODE_CUSTOM" - BODY_CONTAINS = "BODY_CONTAINS" - BODY_NOT_CONTAINS = "BODY_NOT_CONTAINS" - - -class Http(Task): - """Task HTTP object, declare behavior for HTTP task to dolphinscheduler.""" - - _task_custom_attr = { - "url", - "http_method", - "http_params", - "http_check_condition", - "condition", - "connect_timeout", - "socket_timeout", - } - - def __init__( - self, - name: str, - url: str, - http_method: Optional[str] = HttpMethod.GET, - http_params: Optional[str] = None, - http_check_condition: Optional[str] = HttpCheckCondition.STATUS_CODE_DEFAULT, - condition: Optional[str] = None, - connect_timeout: Optional[int] = 60000, - socket_timeout: Optional[int] = 60000, - *args, - **kwargs - ): - super().__init__(name, TaskType.HTTP, *args, **kwargs) - self.url = url - if not hasattr(HttpMethod, http_method): - raise PyDSParamException( - "Parameter http_method %s not support.", http_method - ) - self.http_method = http_method - self.http_params = http_params or [] - if not hasattr(HttpCheckCondition, http_check_condition): - raise PyDSParamException( - "Parameter http_check_condition %s not support.", http_check_condition - ) - self.http_check_condition = http_check_condition - if ( - http_check_condition != HttpCheckCondition.STATUS_CODE_DEFAULT - and condition is None - ): - raise PyDSParamException( - "Parameter condition must provider if http_check_condition not equal to STATUS_CODE_DEFAULT" - ) - self.condition = condition - self.connect_timeout = connect_timeout - self.socket_timeout = socket_timeout diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/map_reduce.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/map_reduce.py deleted file mode 100644 index 5050bd3cf1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/map_reduce.py +++ /dev/null @@ -1,52 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task MR.""" - -from typing import Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.engine import Engine, ProgramType - - -class MR(Engine): - """Task mr object, declare behavior for mr task to dolphinscheduler.""" - - _task_custom_attr = { - "app_name", - "main_args", - "others", - } - - def __init__( - self, - name: str, - main_class: str, - main_package: str, - program_type: Optional[ProgramType] = ProgramType.SCALA, - app_name: Optional[str] = None, - main_args: Optional[str] = None, - others: Optional[str] = None, - *args, - **kwargs - ): - super().__init__( - name, TaskType.MR, main_class, main_package, program_type, *args, **kwargs - ) - self.app_name = app_name - self.main_args = main_args - self.others = others diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py deleted file mode 100644 index e86797aadf..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py +++ /dev/null @@ -1,256 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task mlflow.""" -from copy import deepcopy -from typing import Dict, Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task - - -class MLflowTaskType(str): - """MLflow task type.""" - - MLFLOW_PROJECTS = "MLflow Projects" - MLFLOW_MODELS = "MLflow Models" - - -class MLflowJobType(str): - """MLflow job type.""" - - AUTOML = "AutoML" - BASIC_ALGORITHM = "BasicAlgorithm" - CUSTOM_PROJECT = "CustomProject" - - -class MLflowDeployType(str): - """MLflow deploy type.""" - - MLFLOW = "MLFLOW" - DOCKER = "DOCKER" - - -DEFAULT_MLFLOW_TRACKING_URI = "http://127.0.0.1:5000" -DEFAULT_VERSION = "master" - - -class BaseMLflow(Task): - """Base MLflow task.""" - - mlflow_task_type = None - - _task_custom_attr = { - "mlflow_tracking_uri", - "mlflow_task_type", - } - - _child_task_mlflow_attr = set() - - def __init__(self, name: str, mlflow_tracking_uri: str, *args, **kwargs): - super().__init__(name, TaskType.MLFLOW, *args, **kwargs) - self.mlflow_tracking_uri = mlflow_tracking_uri - - @property - def task_params(self) -> Dict: - """Return task params.""" - self._task_custom_attr = deepcopy(self._task_custom_attr) - self._task_custom_attr.update(self._child_task_mlflow_attr) - return super().task_params - - -class MLflowModels(BaseMLflow): - """Task MLflow models object, declare behavior for MLflow models task to dolphinscheduler. - - Deploy machine learning models in diverse serving environments. - - :param name: task name - :param model_uri: Model-URI of MLflow , support models://suffix format and runs:/ format. - See https://mlflow.org/docs/latest/tracking.html#artifact-stores - :param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 - :param deploy_mode: MLflow deploy mode, support MLFLOW, DOCKER, default is DOCKER - :param port: deploy port, default is 7000 - """ - - mlflow_task_type = MLflowTaskType.MLFLOW_MODELS - - _child_task_mlflow_attr = { - "deploy_type", - "deploy_model_key", - "deploy_port", - } - - def __init__( - self, - name: str, - model_uri: str, - mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, - deploy_mode: Optional[str] = MLflowDeployType.DOCKER, - port: Optional[int] = 7000, - *args, - **kwargs - ): - """Init mlflow models task.""" - super().__init__(name, mlflow_tracking_uri, *args, **kwargs) - self.deploy_type = deploy_mode.upper() - self.deploy_model_key = model_uri - self.deploy_port = port - - -class MLFlowProjectsCustom(BaseMLflow): - """Task MLflow projects object, declare behavior for MLflow Custom projects task to dolphinscheduler. - - :param name: task name - :param repository: Repository url of MLflow Project, Support git address and directory on worker. - If it's in a subdirectory, We add # to support this (same as mlflow run) , - for example https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native. - :param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 - :param experiment_name: MLflow experiment name, default is empty - :param parameters: MLflow project parameters, default is empty - :param version: MLflow project version, default is master - - """ - - mlflow_task_type = MLflowTaskType.MLFLOW_PROJECTS - mlflow_job_type = MLflowJobType.CUSTOM_PROJECT - - _child_task_mlflow_attr = { - "mlflow_job_type", - "experiment_name", - "params", - "mlflow_project_repository", - "mlflow_project_version", - } - - def __init__( - self, - name: str, - repository: str, - mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, - experiment_name: Optional[str] = "", - parameters: Optional[str] = "", - version: Optional[str] = "master", - *args, - **kwargs - ): - """Init mlflow projects task.""" - super().__init__(name, mlflow_tracking_uri, *args, **kwargs) - self.mlflow_project_repository = repository - self.experiment_name = experiment_name - self.params = parameters - self.mlflow_project_version = version - - -class MLFlowProjectsAutoML(BaseMLflow): - """Task MLflow projects object, declare behavior for AutoML task to dolphinscheduler. - - :param name: task name - :param data_path: data path of MLflow Project, Support git address and directory on worker. - :param automl_tool: The AutoML tool used, currently supports autosklearn and flaml. - :param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 - :param experiment_name: MLflow experiment name, default is empty - :param model_name: MLflow model name, default is empty - :param parameters: MLflow project parameters, default is empty - - """ - - mlflow_task_type = MLflowTaskType.MLFLOW_PROJECTS - mlflow_job_type = MLflowJobType.AUTOML - - _child_task_mlflow_attr = { - "mlflow_job_type", - "experiment_name", - "model_name", - "register_model", - "data_path", - "params", - "automl_tool", - } - - def __init__( - self, - name: str, - data_path: str, - automl_tool: Optional[str] = "flaml", - mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, - experiment_name: Optional[str] = "", - model_name: Optional[str] = "", - parameters: Optional[str] = "", - *args, - **kwargs - ): - """Init mlflow projects task.""" - super().__init__(name, mlflow_tracking_uri, *args, **kwargs) - self.data_path = data_path - self.experiment_name = experiment_name - self.model_name = model_name - self.params = parameters - self.automl_tool = automl_tool.lower() - self.register_model = bool(model_name) - - -class MLFlowProjectsBasicAlgorithm(BaseMLflow): - """Task MLflow projects object, declare behavior for BasicAlgorithm task to dolphinscheduler. - - :param name: task name - :param data_path: data path of MLflow Project, Support git address and directory on worker. - :param algorithm: The selected algorithm currently supports LR, SVM, LightGBM and XGboost - based on scikit-learn form. - :param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 - :param experiment_name: MLflow experiment name, default is empty - :param model_name: MLflow model name, default is empty - :param parameters: MLflow project parameters, default is empty - :param search_params: Whether to search the parameters, default is empty - - """ - - mlflow_job_type = MLflowJobType.BASIC_ALGORITHM - mlflow_task_type = MLflowTaskType.MLFLOW_PROJECTS - - _child_task_mlflow_attr = { - "mlflow_job_type", - "experiment_name", - "model_name", - "register_model", - "data_path", - "params", - "algorithm", - "search_params", - } - - def __init__( - self, - name: str, - data_path: str, - algorithm: Optional[str] = "lightgbm", - mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, - experiment_name: Optional[str] = "", - model_name: Optional[str] = "", - parameters: Optional[str] = "", - search_params: Optional[str] = "", - *args, - **kwargs - ): - """Init mlflow projects task.""" - super().__init__(name, mlflow_tracking_uri, *args, **kwargs) - self.data_path = data_path - self.experiment_name = experiment_name - self.model_name = model_name - self.params = parameters - self.algorithm = algorithm.lower() - self.search_params = search_params - self.register_model = bool(model_name) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/openmldb.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/openmldb.py deleted file mode 100644 index 5dad36ec11..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/openmldb.py +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task OpenMLDB.""" - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task - - -class OpenMLDB(Task): - """Task OpenMLDB object, declare behavior for OpenMLDB task to dolphinscheduler. - - :param name: task name - :param zookeeper: OpenMLDB cluster zookeeper address, e.g. 127.0.0.1:2181. - :param zookeeper_path: OpenMLDB cluster zookeeper path, e.g. /openmldb. - :param execute_mode: Determine the init mode, offline or online. You can switch it in sql statementself. - :param sql: SQL statement. - """ - - _task_custom_attr = { - "zk", - "zk_path", - "execute_mode", - "sql", - } - - def __init__( - self, name, zookeeper, zookeeper_path, execute_mode, sql, *args, **kwargs - ): - super().__init__(name, TaskType.OPENMLDB, *args, **kwargs) - self.zk = zookeeper - self.zk_path = zookeeper_path - self.execute_mode = execute_mode - self.sql = sql diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/procedure.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/procedure.py deleted file mode 100644 index 6383e075ab..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/procedure.py +++ /dev/null @@ -1,60 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task procedure.""" - -from typing import Dict - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.database import Database -from pydolphinscheduler.core.task import Task - - -class Procedure(Task): - """Task Procedure object, declare behavior for Procedure task to dolphinscheduler. - - It should run database procedure job in multiply sql lik engine, such as: - - ClickHouse - - DB2 - - HIVE - - MySQL - - Oracle - - Postgresql - - Presto - - SQLServer - You provider datasource_name contain connection information, it decisions which - database type and database instance would run this sql. - """ - - _task_custom_attr = {"method"} - - def __init__(self, name: str, datasource_name: str, method: str, *args, **kwargs): - super().__init__(name, TaskType.PROCEDURE, *args, **kwargs) - self.datasource_name = datasource_name - self.method = method - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for produce task. - - produce task have some specials attribute for task_params, and is odd if we - directly set as python property, so we Override Task.task_params here. - """ - params = super().task_params - datasource = Database(self.datasource_name, "type", "datasource") - params.update(datasource) - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/python.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/python.py deleted file mode 100644 index 593cc52cc2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/python.py +++ /dev/null @@ -1,105 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task Python.""" - -import inspect -import logging -import re -import types -from typing import Union - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSParamException - -log = logging.getLogger(__file__) - - -class Python(Task): - """Task Python object, declare behavior for Python task to dolphinscheduler. - - Python task support two types of parameters for :param:``definition``, and here is an example: - - Using str type of :param:``definition`` - - .. code-block:: python - - python_task = Python(name="str_type", definition="print('Hello Python task.')") - - Or using Python callable type of :param:``definition`` - - .. code-block:: python - - def foo(): - print("Hello Python task.") - - python_task = Python(name="str_type", definition=foo) - - :param name: The name for Python task. It define the task name. - :param definition: String format of Python script you want to execute or Python callable you - want to execute. - """ - - _task_custom_attr = {"raw_script", "definition"} - - ext: set = {".py"} - ext_attr: Union[str, types.FunctionType] = "_definition" - - def __init__( - self, name: str, definition: Union[str, types.FunctionType], *args, **kwargs - ): - self._definition = definition - super().__init__(name, TaskType.PYTHON, *args, **kwargs) - - def _build_exe_str(self) -> str: - """Build executable string from given definition. - - Attribute ``self.definition`` almost is a function, we need to call this function after parsing it - to string. The easier way to call a function is using syntax ``func()`` and we use it to call it too. - """ - definition = getattr(self, "definition") - if isinstance(definition, types.FunctionType): - py_function = inspect.getsource(definition) - func_str = f"{py_function}{definition.__name__}()" - else: - pattern = re.compile("^def (\\w+)\\(") - find = pattern.findall(definition) - if not find: - log.warning( - "Python definition is simple script instead of function, with value %s", - definition, - ) - return definition - # Keep function str and function callable always have one blank line - func_str = ( - f"{definition}{find[0]}()" - if definition.endswith("\n") - else f"{definition}\n{find[0]}()" - ) - return func_str - - @property - def raw_script(self) -> str: - """Get python task define attribute `raw_script`.""" - if isinstance(getattr(self, "definition"), (str, types.FunctionType)): - return self._build_exe_str() - else: - raise PyDSParamException( - "Parameter definition do not support % for now.", - type(getattr(self, "definition")), - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/pytorch.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/pytorch.py deleted file mode 100644 index 4767f7ecee..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/pytorch.py +++ /dev/null @@ -1,95 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task Pytorch.""" -from typing import Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task - - -class DEFAULT: - """Default values for Pytorch.""" - - is_create_environment = False - project_path = "." - python_command = "${PYTHON_HOME}" - - -class Pytorch(Task): - """Task Pytorch object, declare behavior for Pytorch task to dolphinscheduler. - - See also: `DolphinScheduler Pytorch Task Plugin - `_ - - :param name: task name - :param script: Entry to the Python script file that you want to run. - :param script_params: Input parameters at run time. - :param project_path: The path to the project. Default "." . - :param is_create_environment: is create environment. Default False. - :param python_command: The path to the python command. Default "${PYTHON_HOME}". - :param python_env_tool: The python environment tool. Default "conda". - :param requirements: The path to the requirements.txt file. Default "requirements.txt". - :param conda_python_version: The python version of conda environment. Default "3.7". - """ - - _task_custom_attr = { - "script", - "script_params", - "other_params", - "python_path", - "is_create_environment", - "python_command", - "python_env_tool", - "requirements", - "conda_python_version", - } - - def __init__( - self, - name: str, - script: str, - script_params: str = "", - project_path: Optional[str] = DEFAULT.project_path, - is_create_environment: Optional[bool] = DEFAULT.is_create_environment, - python_command: Optional[str] = DEFAULT.python_command, - python_env_tool: Optional[str] = "conda", - requirements: Optional[str] = "requirements.txt", - conda_python_version: Optional[str] = "3.7", - *args, - **kwargs, - ): - """Init Pytorch task.""" - super().__init__(name, TaskType.PYTORCH, *args, **kwargs) - self.script = script - self.script_params = script_params - self.is_create_environment = is_create_environment - self.python_path = project_path - self.python_command = python_command - self.python_env_tool = python_env_tool - self.requirements = requirements - self.conda_python_version = conda_python_version - - @property - def other_params(self): - """Return other params.""" - conds = [ - self.is_create_environment != DEFAULT.is_create_environment, - self.python_path != DEFAULT.project_path, - self.python_command != DEFAULT.python_command, - ] - return any(conds) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sagemaker.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sagemaker.py deleted file mode 100644 index 30b128d172..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sagemaker.py +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task SageMaker.""" - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task - - -class SageMaker(Task): - """Task SageMaker object, declare behavior for SageMaker task to dolphinscheduler. - - :param name: A unique, meaningful string for the SageMaker task. - :param sagemaker_request_json: Request parameters of StartPipelineExecution, - see also `AWS API - `_ - - """ - - _task_custom_attr = { - "sagemaker_request_json", - } - - def __init__(self, name: str, sagemaker_request_json: str, *args, **kwargs): - super().__init__(name, TaskType.SAGEMAKER, *args, **kwargs) - self.sagemaker_request_json = sagemaker_request_json diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py deleted file mode 100644 index 36ec4e87d0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py +++ /dev/null @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task shell.""" - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task - - -class Shell(Task): - """Task shell object, declare behavior for shell task to dolphinscheduler. - - :param name: A unique, meaningful string for the shell task. - :param command: One or more command want to run in this task. - - It could be simply command:: - - Shell(name=..., command="echo task shell") - - or maybe same commands trying to do complex task:: - - command = '''echo task shell step 1; - echo task shell step 2; - echo task shell step 3 - ''' - - Shell(name=..., command=command) - - """ - - # TODO maybe we could use instance name to replace attribute `name` - # which is simplify as `task_shell = Shell(command = "echo 1")` and - # task.name assign to `task_shell` - - _task_custom_attr = { - "raw_script", - } - - ext: set = {".sh", ".zsh"} - ext_attr: str = "_raw_script" - - def __init__(self, name: str, command: str, *args, **kwargs): - self._raw_script = command - super().__init__(name, TaskType.SHELL, *args, **kwargs) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/spark.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/spark.py deleted file mode 100644 index eb9c621043..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/spark.py +++ /dev/null @@ -1,84 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task Spark.""" - -from typing import Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.engine import Engine, ProgramType - - -class DeployMode(str): - """SPARK deploy mode, for now it just contain `LOCAL`, `CLIENT` and `CLUSTER`.""" - - LOCAL = "local" - CLIENT = "client" - CLUSTER = "cluster" - - -class Spark(Engine): - """Task spark object, declare behavior for spark task to dolphinscheduler.""" - - _task_custom_attr = { - "deploy_mode", - "driver_cores", - "driver_memory", - "num_executors", - "executor_memory", - "executor_cores", - "app_name", - "main_args", - "others", - } - - def __init__( - self, - name: str, - main_class: str, - main_package: str, - program_type: Optional[ProgramType] = ProgramType.SCALA, - deploy_mode: Optional[DeployMode] = DeployMode.CLUSTER, - app_name: Optional[str] = None, - driver_cores: Optional[int] = 1, - driver_memory: Optional[str] = "512M", - num_executors: Optional[int] = 2, - executor_memory: Optional[str] = "2G", - executor_cores: Optional[int] = 2, - main_args: Optional[str] = None, - others: Optional[str] = None, - *args, - **kwargs - ): - super().__init__( - name, - TaskType.SPARK, - main_class, - main_package, - program_type, - *args, - **kwargs - ) - self.deploy_mode = deploy_mode - self.app_name = app_name - self.driver_cores = driver_cores - self.driver_memory = driver_memory - self.num_executors = num_executors - self.executor_memory = executor_memory - self.executor_cores = executor_cores - self.main_args = main_args - self.others = others diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py deleted file mode 100644 index 4bebf8379d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py +++ /dev/null @@ -1,122 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task sql.""" - -import logging -import re -from typing import Dict, Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.database import Database -from pydolphinscheduler.core.task import Task - -log = logging.getLogger(__file__) - - -class SqlType: - """SQL type, for now it just contain `SELECT` and `NO_SELECT`.""" - - SELECT = "0" - NOT_SELECT = "1" - - -class Sql(Task): - """Task SQL object, declare behavior for SQL task to dolphinscheduler. - - It should run sql job in multiply sql lik engine, such as: - - ClickHouse - - DB2 - - HIVE - - MySQL - - Oracle - - Postgresql - - Presto - - SQLServer - You provider datasource_name contain connection information, it decisions which - database type and database instance would run this sql. - """ - - _task_custom_attr = { - "sql", - "sql_type", - "pre_statements", - "post_statements", - "display_rows", - } - - ext: set = {".sql"} - ext_attr: str = "_sql" - - def __init__( - self, - name: str, - datasource_name: str, - sql: str, - sql_type: Optional[str] = None, - pre_statements: Optional[str] = None, - post_statements: Optional[str] = None, - display_rows: Optional[int] = 10, - *args, - **kwargs - ): - self._sql = sql - super().__init__(name, TaskType.SQL, *args, **kwargs) - self.param_sql_type = sql_type - self.datasource_name = datasource_name - self.pre_statements = pre_statements or [] - self.post_statements = post_statements or [] - self.display_rows = display_rows - - @property - def sql_type(self) -> str: - """Judgement sql type, it will return the SQL type for type `SELECT` or `NOT_SELECT`. - - If `param_sql_type` dot not specific, will use regexp to check - which type of the SQL is. But if `param_sql_type` is specific - will use the parameter overwrites the regexp way - """ - if ( - self.param_sql_type == SqlType.SELECT - or self.param_sql_type == SqlType.NOT_SELECT - ): - log.info( - "The sql type is specified by a parameter, with value %s", - self.param_sql_type, - ) - return self.param_sql_type - pattern_select_str = ( - "^(?!(.* |)insert |(.* |)delete |(.* |)drop " - "|(.* |)update |(.* |)truncate |(.* |)alter |(.* |)create ).*" - ) - pattern_select = re.compile(pattern_select_str, re.IGNORECASE) - if pattern_select.match(self._sql) is None: - return SqlType.NOT_SELECT - else: - return SqlType.SELECT - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for sql task. - - sql task have some specials attribute for task_params, and is odd if we - directly set as python property, so we Override Task.task_params here. - """ - params = super().task_params - datasource = Database(self.datasource_name, "type", "datasource") - params.update(datasource) - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sub_process.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sub_process.py deleted file mode 100644 index c7a9f8bd11..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sub_process.py +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task sub_process.""" - -from typing import Dict - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSProcessDefinitionNotAssignException -from pydolphinscheduler.java_gateway import JavaGate - - -class SubProcess(Task): - """Task SubProcess object, declare behavior for SubProcess task to dolphinscheduler.""" - - _task_custom_attr = {"process_definition_code"} - - def __init__(self, name: str, process_definition_name: str, *args, **kwargs): - super().__init__(name, TaskType.SUB_PROCESS, *args, **kwargs) - self.process_definition_name = process_definition_name - - @property - def process_definition_code(self) -> str: - """Get process definition code, a wrapper for :func:`get_process_definition_info`.""" - return self.get_process_definition_info(self.process_definition_name).get( - "code" - ) - - def get_process_definition_info(self, process_definition_name: str) -> Dict: - """Get process definition info from java gateway, contains process definition id, name, code.""" - if not self.process_definition: - raise PyDSProcessDefinitionNotAssignException( - "ProcessDefinition must be provider for task SubProcess." - ) - return JavaGate().get_process_definition_info( - self.process_definition.user.name, - self.process_definition.project.name, - process_definition_name, - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/switch.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/switch.py deleted file mode 100644 index 45edaa9aac..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/switch.py +++ /dev/null @@ -1,166 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task Switch.""" - -from typing import Dict, Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.models.base import Base - - -class SwitchBranch(Base): - """Base class of ConditionBranch of task switch. - - It a parent class for :class:`Branch` and :class:`Default`. - """ - - _DEFINE_ATTR = { - "next_node", - } - - def __init__(self, task: Task, exp: Optional[str] = None): - super().__init__(f"Switch.{self.__class__.__name__.upper()}") - self.task = task - self.exp = exp - - @property - def next_node(self) -> str: - """Get task switch property next_node, it return task code when init class switch.""" - return self.task.code - - @property - def condition(self) -> Optional[str]: - """Get task switch property condition.""" - return self.exp - - def get_define(self, camel_attr: bool = True) -> Dict: - """Get :class:`ConditionBranch` definition attribute communicate to Java gateway server.""" - if self.condition: - self._DEFINE_ATTR.add("condition") - return super().get_define() - - -class Branch(SwitchBranch): - """Common condition branch for switch task. - - If any condition in :class:`Branch` match, would set this :class:`Branch`'s task as downstream of task - switch. If all condition branch do not match would set :class:`Default`'s task as task switch downstream. - """ - - def __init__(self, condition: str, task: Task): - super().__init__(task, condition) - - -class Default(SwitchBranch): - """Class default branch for switch task. - - If all condition of :class:`Branch` do not match, task switch would run the tasks in :class:`Default` - and set :class:`Default`'s task as switch downstream. Please notice that each switch condition - could only have one single :class:`Default`. - """ - - def __init__(self, task: Task): - super().__init__(task) - - -class SwitchCondition(Base): - """Set switch condition of given parameter.""" - - _DEFINE_ATTR = { - "depend_task_list", - } - - def __init__(self, *args): - super().__init__(self.__class__.__name__) - self.args = args - - def set_define_attr(self) -> None: - """Set attribute to function :func:`get_define`. - - It is a wrapper for both `And` and `Or` operator. - """ - result = [] - num_branch_default = 0 - for condition in self.args: - if not isinstance(condition, SwitchBranch): - raise PyDSParamException( - "Task Switch's parameter only support SwitchBranch but got %s.", - type(condition), - ) - # Default number branch checker - if num_branch_default >= 1 and isinstance(condition, Default): - raise PyDSParamException( - "Task Switch's parameter only support exactly one default branch." - ) - if isinstance(condition, Default): - self._DEFINE_ATTR.add("next_node") - setattr(self, "next_node", condition.next_node) - num_branch_default += 1 - elif isinstance(condition, Branch): - result.append(condition.get_define()) - # Handle switch default branch, default value is `""` if not provide. - if num_branch_default == 0: - self._DEFINE_ATTR.add("next_node") - setattr(self, "next_node", "") - setattr(self, "depend_task_list", result) - - def get_define(self, camel_attr=True) -> Dict: - """Overwrite Base.get_define to get task Condition specific get define.""" - self.set_define_attr() - return super().get_define() - - -class Switch(Task): - """Task switch object, declare behavior for switch task to dolphinscheduler. - - Param of process definition or at least one local param of task must be set - if task `switch` in this workflow. - """ - - _task_ignore_attr = { - "condition_result", - "dependence", - } - - def __init__(self, name: str, condition: SwitchCondition, *args, **kwargs): - super().__init__(name, TaskType.SWITCH, *args, **kwargs) - self.condition = condition - # Set condition tasks as current task downstream - self._set_dep() - - def _set_dep(self) -> None: - """Set downstream according to parameter `condition`.""" - downstream = [] - for condition in self.condition.args: - if isinstance(condition, SwitchBranch): - downstream.append(condition.task) - self.set_downstream(downstream) - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for switch task. - - switch task have some specials attribute `switch`, and in most of the task - this attribute is None and use empty dict `{}` as default value. We do not use class - attribute `_task_custom_attr` due to avoid attribute cover. - """ - params = super().task_params - params["switchResult"] = self.condition.get_define() - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/__init__.py deleted file mode 100644 index f8d3fbf62f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init utils package.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/date.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/date.py deleted file mode 100644 index 18cf93e318..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/date.py +++ /dev/null @@ -1,82 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Date util function collections.""" - -from datetime import datetime - -from pydolphinscheduler.constants import Delimiter, Time - -LEN_SUPPORT_DATETIME = ( - 15, - 19, -) - -FMT_SHORT = f"{Time.FMT_SHORT_DATE} {Time.FMT_NO_COLON_TIME}" -FMT_DASH = f"{Time.FMT_DASH_DATE} {Time.FMT_STD_TIME}" -FMT_STD = f"{Time.FMT_STD_DATE} {Time.FMT_STD_TIME}" - -MAX_DATETIME = datetime(9999, 12, 31, 23, 59, 59) - - -def conv_to_schedule(src: datetime) -> str: - """Convert given datetime to schedule date string.""" - return datetime.strftime(src, FMT_STD) - - -def conv_from_str(src: str) -> datetime: - """Convert given string to datetime. - - This function give an ability to convert string to datetime, and for now it could handle - format like: - - %Y-%m-%d - - %Y/%m/%d - - %Y%m%d - - %Y-%m-%d %H:%M:%S - - %Y/%m/%d %H:%M:%S - - %Y%m%d %H%M%S - If pattern not like above be given will raise NotImplementedError. - """ - len_ = len(src) - if len_ == Time.LEN_SHORT_DATE: - return datetime.strptime(src, Time.FMT_SHORT_DATE) - elif len_ == Time.LEN_STD_DATE: - if Delimiter.BAR in src: - return datetime.strptime(src, Time.FMT_STD_DATE) - elif Delimiter.DASH in src: - return datetime.strptime(src, Time.FMT_DASH_DATE) - else: - raise NotImplementedError( - "%s could not be convert to datetime for now.", src - ) - elif len_ in LEN_SUPPORT_DATETIME: - if Delimiter.BAR in src and Delimiter.COLON in src: - return datetime.strptime(src, FMT_STD) - elif Delimiter.DASH in src and Delimiter.COLON in src: - return datetime.strptime(src, FMT_DASH) - elif ( - Delimiter.DASH not in src - and Delimiter.BAR not in src - and Delimiter.COLON not in src - ): - return datetime.strptime(src, FMT_SHORT) - else: - raise NotImplementedError( - "%s could not be convert to datetime for now.", src - ) - else: - raise NotImplementedError("%s could not be convert to datetime for now.", src) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/file.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/file.py deleted file mode 100644 index 075b9025b2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/file.py +++ /dev/null @@ -1,57 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""File util for pydolphinscheduler.""" - -from pathlib import Path -from typing import Optional - - -def write( - content: str, - to_path: str, - create: Optional[bool] = True, - overwrite: Optional[bool] = False, -) -> None: - """Write configs dict to configuration file. - - :param content: The source string want to write to :param:`to_path`. - :param to_path: The path want to write content. - :param create: Whether create the file parent directory or not if it does not exist. - If set ``True`` will create file with :param:`to_path` if path not exists, otherwise - ``False`` will not create. Default ``True``. - :param overwrite: Whether overwrite the file or not if it exists. If set ``True`` - will overwrite the exists content, otherwise ``False`` will not overwrite it. Default ``True``. - """ - path = Path(to_path) - if not path.parent.exists(): - if create: - path.parent.mkdir(parents=True) - else: - raise ValueError( - "Parent directory do not exists and set param `create` to `False`." - ) - if not path.exists(): - with path.open(mode="w") as f: - f.write(content) - elif overwrite: - with path.open(mode="w") as f: - f.write(content) - else: - raise FileExistsError( - "File %s already exists and you choose not overwrite mode.", to_path - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/string.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/string.py deleted file mode 100644 index e7e781c4d6..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/string.py +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""String util function collections.""" - -from pydolphinscheduler.constants import Delimiter - - -def attr2camel(attr: str, include_private=True): - """Covert class attribute name to camel case.""" - if include_private: - attr = attr.lstrip(Delimiter.UNDERSCORE) - return snake2camel(attr) - - -def snake2camel(snake: str): - """Covert snake case to camel case.""" - components = snake.split(Delimiter.UNDERSCORE) - return components[0] + "".join(x.title() for x in components[1:]) - - -def class_name2camel(class_name: str): - """Covert class name string to camel case.""" - class_name = class_name.lstrip(Delimiter.UNDERSCORE) - return class_name[0].lower() + snake2camel(class_name[1:]) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/yaml_parser.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/yaml_parser.py deleted file mode 100644 index 46ee08cec8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/yaml_parser.py +++ /dev/null @@ -1,159 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""YAML parser utils, parser yaml string to ``ruamel.yaml`` object and nested key dict.""" - -import copy -import io -from typing import Any, Dict, Optional - -from ruamel.yaml import YAML -from ruamel.yaml.comments import CommentedMap - - -class YamlParser: - """A parser to parse Yaml file and provider easier way to access or change value. - - This parser provider delimiter string key to get or set :class:`ruamel.yaml.YAML` object - - For example, yaml config named ``test.yaml`` and its content as below: - - .. code-block:: yaml - - one: - two1: - three: value1 - two2: value2 - - you could get ``value1`` and ``value2`` by nested path - - .. code-block:: python - - yaml_parser = YamlParser("test.yaml") - - # Use function ``get`` to get value - value1 = yaml_parser.get("one.two1.three") - # Or use build-in ``__getitem__`` to get value - value2 = yaml_parser["one.two2"] - - or you could change ``value1`` to ``value3``, also change ``value2`` to ``value4`` by nested path assigned - - .. code-block:: python - - yaml_parser["one.two1.three"] = "value3" - yaml_parser["one.two2"] = "value4" - """ - - def __init__(self, content: str, delimiter: Optional[str] = "."): - self._content = content - self.src_parser = content - self._delimiter = delimiter - - @property - def src_parser(self) -> CommentedMap: - """Get src_parser property.""" - return self._src_parser - - @src_parser.setter - def src_parser(self, content: str) -> None: - """Set src_parser property.""" - self._yaml = YAML() - self._src_parser = self._yaml.load(content) - - def parse_nested_dict( - self, result: Dict, commented_map: CommentedMap, key: str - ) -> None: - """Parse :class:`ruamel.yaml.comments.CommentedMap` to nested dict using :param:`delimiter`.""" - if not isinstance(commented_map, CommentedMap): - return - for sub_key in set(commented_map.keys()): - next_key = f"{key}{self._delimiter}{sub_key}" - result[next_key] = commented_map[sub_key] - self.parse_nested_dict(result, commented_map[sub_key], next_key) - - @property - def dict_parser(self) -> Dict: - """Get :class:`CommentedMap` to nested dict using :param:`delimiter` as key delimiter. - - Use Depth-First-Search get all nested key and value, and all key connect by :param:`delimiter`. - It make users could easier access or change :class:`CommentedMap` object. - - For example, yaml config named ``test.yaml`` and its content as below: - - .. code-block:: yaml - - one: - two1: - three: value1 - two2: value2 - - It could parser to nested dict as - - .. code-block:: python - - { - "one": ordereddict([('two1', ordereddict([('three', 'value1')])), ('two2', 'value2')]), - "one.two1": ordereddict([('three', 'value1')]), - "one.two1.three": "value1", - "one.two2": "value2", - } - """ - res = dict() - src_parser_copy = copy.deepcopy(self.src_parser) - - base_keys = set(src_parser_copy.keys()) - if not base_keys: - return res - else: - for key in base_keys: - res[key] = src_parser_copy[key] - self.parse_nested_dict(res, src_parser_copy[key], key) - return res - - def __contains__(self, key) -> bool: - return key in self.dict_parser - - def __getitem__(self, key: str) -> Any: - return self.dict_parser[key] - - def __setitem__(self, key: str, val: Any) -> None: - if key not in self.dict_parser: - raise KeyError("Key %s do not exists.", key) - - mid = None - keys = key.split(self._delimiter) - for idx, k in enumerate(keys, 1): - if idx == len(keys): - mid[k] = val - else: - mid = mid[k] if mid else self.src_parser[k] - - def get(self, key: str) -> Any: - """Get value by key, is call ``__getitem__``.""" - return self[key] - - def __str__(self) -> str: - """Transfer :class:`YamlParser` to string object. - - It is useful when users want to output the :class:`YamlParser` object they change just now. - """ - buf = io.StringIO() - self._yaml.dump(self.src_parser, buf) - return buf.getvalue() - - def __repr__(self) -> str: - return f"YamlParser({str(self)})" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/__init__.py deleted file mode 100644 index 5ce1f82a1a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init tests package.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/cli/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/cli/__init__.py deleted file mode 100644 index f1a4396af6..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/cli/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init command line interface tests.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/cli/test_config.py b/dolphinscheduler-python/pydolphinscheduler/tests/cli/test_config.py deleted file mode 100644 index 516ad754a2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/cli/test_config.py +++ /dev/null @@ -1,198 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test command line interface subcommand `config`.""" - -import os -from pathlib import Path - -import pytest - -from pydolphinscheduler.cli.commands import cli -from pydolphinscheduler.configuration import BUILD_IN_CONFIG_PATH, config_path -from tests.testing.cli import CliTestWrapper -from tests.testing.constants import DEV_MODE, ENV_PYDS_HOME -from tests.testing.file import get_file_content - -config_file = "config.yaml" - - -@pytest.fixture -def teardown_file_env(): - """Util for deleting temp configuration file and pop env var after test finish.""" - yield - config_file_path = config_path() - if config_file_path.exists(): - config_file_path.unlink() - # pop environment variable to keep test cases dependent - os.environ.pop(ENV_PYDS_HOME, None) - assert ENV_PYDS_HOME not in os.environ - - -@pytest.mark.parametrize( - "home", - [ - None, - "/tmp/pydolphinscheduler", - "/tmp/test_abc", - ], -) -def test_config_init(teardown_file_env, home): - """Test command line interface `config --init`.""" - if home: - os.environ[ENV_PYDS_HOME] = home - elif DEV_MODE: - pytest.skip( - "Avoid delete ~/pydolphinscheduler/config.yaml by accident when test locally." - ) - - config_file_path = config_path() - assert not config_file_path.exists() - - cli_test = CliTestWrapper(cli, ["config", "--init"]) - cli_test.assert_success() - - assert config_file_path.exists() - assert get_file_content(config_file_path) == get_file_content(BUILD_IN_CONFIG_PATH) - - -@pytest.mark.parametrize( - "key, expect", - [ - # We test each key in one single section - ("java_gateway.address", "127.0.0.1"), - ("default.user.name", "userPythonGateway"), - ("default.workflow.project", "project-pydolphin"), - ], -) -def test_config_get(teardown_file_env, key: str, expect: str): - """Test command line interface `config --get XXX`.""" - os.environ[ENV_PYDS_HOME] = "/tmp/pydolphinscheduler" - cli_test = CliTestWrapper(cli, ["config", "--init"]) - cli_test.assert_success() - - cli_test = CliTestWrapper(cli, ["config", "--get", key]) - cli_test.assert_success(output=f"{key} = {expect}", fuzzy=True) - - -@pytest.mark.parametrize( - "keys, expects", - [ - # We test mix section keys - (("java_gateway.address", "java_gateway.port"), ("127.0.0.1", "25333")), - ( - ("java_gateway.auto_convert", "default.user.tenant"), - ("True", "tenant_pydolphin"), - ), - ( - ( - "java_gateway.port", - "default.user.state", - "default.workflow.worker_group", - ), - ("25333", "1", "default"), - ), - ], -) -def test_config_get_multiple(teardown_file_env, keys: str, expects: str): - """Test command line interface `config --get KEY1 --get KEY2 ...`.""" - os.environ[ENV_PYDS_HOME] = "/tmp/pydolphinscheduler" - cli_test = CliTestWrapper(cli, ["config", "--init"]) - cli_test.assert_success() - - get_args = ["config"] - for key in keys: - get_args.append("--get") - get_args.append(key) - cli_test = CliTestWrapper(cli, get_args) - - for idx, expect in enumerate(expects): - cli_test.assert_success(output=f"{keys[idx]} = {expect}", fuzzy=True) - - -@pytest.mark.parametrize( - "key, value", - [ - # We test each key in one single section - ("java_gateway.address", "127.1.1.1"), - ("default.user.name", "editUserPythonGateway"), - ("default.workflow.project", "edit-project-pydolphin"), - ], -) -def test_config_set(teardown_file_env, key: str, value: str): - """Test command line interface `config --set KEY VALUE`.""" - path = "/tmp/pydolphinscheduler" - assert not Path(path).joinpath(config_file).exists() - os.environ[ENV_PYDS_HOME] = path - cli_test = CliTestWrapper(cli, ["config", "--init"]) - cli_test.assert_success() - - # Make sure value do not exists first - cli_test = CliTestWrapper(cli, ["config", "--get", key]) - assert f"{key} = {value}" not in cli_test.result.output - - cli_test = CliTestWrapper(cli, ["config", "--set", key, value]) - cli_test.assert_success() - - cli_test = CliTestWrapper(cli, ["config", "--get", key]) - assert f"{key} = {value}" in cli_test.result.output - - -@pytest.mark.parametrize( - "keys, values", - [ - # We test each key in mixture section - (("java_gateway.address", "java_gateway.port"), ("127.1.1.1", "25444")), - ( - ("java_gateway.auto_convert", "default.user.tenant"), - ("False", "edit_tenant_pydolphin"), - ), - ( - ( - "java_gateway.port", - "default.user.state", - "default.workflow.worker_group", - ), - ("25555", "0", "not-default"), - ), - ], -) -def test_config_set_multiple(teardown_file_env, keys: str, values: str): - """Test command line interface `config --set KEY1 VAL1 --set KEY2 VAL2`.""" - path = "/tmp/pydolphinscheduler" - assert not Path(path).joinpath(config_file).exists() - os.environ[ENV_PYDS_HOME] = path - cli_test = CliTestWrapper(cli, ["config", "--init"]) - cli_test.assert_success() - - set_args = ["config"] - for idx, key in enumerate(keys): - # Make sure values do not exists first - cli_test = CliTestWrapper(cli, ["config", "--get", key]) - assert f"{key} = {values[idx]}" not in cli_test.result.output - - set_args.append("--set") - set_args.append(key) - set_args.append(values[idx]) - - cli_test = CliTestWrapper(cli, set_args) - cli_test.assert_success() - - for idx, key in enumerate(keys): - # Make sure values exists after `config --set` run - cli_test = CliTestWrapper(cli, ["config", "--get", key]) - assert f"{key} = {values[idx]}" in cli_test.result.output diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/cli/test_version.py b/dolphinscheduler-python/pydolphinscheduler/tests/cli/test_version.py deleted file mode 100644 index b61d26da02..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/cli/test_version.py +++ /dev/null @@ -1,67 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test command line interface subcommand `version`.""" - -from unittest.mock import patch - -import pytest - -import pydolphinscheduler -from pydolphinscheduler.cli.commands import cli -from tests.testing.cli import CliTestWrapper - - -def test_version(): - """Test whether subcommand `version` correct.""" - cli_test = CliTestWrapper(cli, ["version"]) - cli_test.assert_success(output=f"{pydolphinscheduler.__version__}") - - -@pytest.mark.parametrize( - "version, part, idx", - [ - ("1.2.3", "major", 0), - ("0.1.3", "minor", 1), - ("3.1.0", "micro", 2), - ("1.2.3-beta-1", "micro", 2), - ("1.2.3-alpha", "micro", 2), - ("1.2.3a2", "micro", 2), - ("1.2.3b1", "micro", 2), - ], -) -@patch("pydolphinscheduler.__version__") -def test_version_part(mock_version, version: str, part: str, idx: int): - """Test subcommand `version` option `--part`.""" - mock_version.return_value = version - cli_test = CliTestWrapper(cli, ["version", "--part", part]) - cli_test.assert_success(output=f"{pydolphinscheduler.__version__.split('.')[idx]}") - - -@pytest.mark.parametrize( - "option, output", - [ - # not support option - (["version", "--not-support"], "No such option"), - # not support option value - (["version", "--part", "abc"], "Invalid value for '--part'"), - ], -) -def test_version_not_support_option(option, output): - """Test subcommand `version` not support option or option value.""" - cli_test = CliTestWrapper(cli, option) - cli_test.assert_fail(ret_code=2, output=output, fuzzy=True) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/__init__.py deleted file mode 100644 index 62ce0ea4ee..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init core package tests.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_configuration.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_configuration.py deleted file mode 100644 index b9dc8cb656..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_configuration.py +++ /dev/null @@ -1,272 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test class :mod:`pydolphinscheduler.core.configuration`' method.""" - -import importlib -import os -from pathlib import Path -from typing import Any - -import pytest - -from pydolphinscheduler import configuration -from pydolphinscheduler.configuration import ( - BUILD_IN_CONFIG_PATH, - config_path, - get_single_config, - set_single_config, -) -from pydolphinscheduler.exceptions import PyDSConfException -from pydolphinscheduler.utils.yaml_parser import YamlParser -from tests.testing.constants import DEV_MODE, ENV_PYDS_HOME -from tests.testing.file import get_file_content - - -@pytest.fixture -def teardown_file_env(): - """Util for deleting temp configuration file and pop env var after test finish.""" - yield - config_file_path = config_path() - if config_file_path.exists(): - config_file_path.unlink() - os.environ.pop(ENV_PYDS_HOME, None) - - -@pytest.mark.parametrize( - "val, expect", - [ - ("1", 1), - ("123", 123), - ("4567", 4567), - (b"1234", 1234), - ], -) -def test_get_int(val: Any, expect: int): - """Test function :func:`configuration.get_int`.""" - assert configuration.get_int(val) == expect - - -@pytest.mark.parametrize( - "val", - [ - "a", - "1a", - "1d2", - "1723-", - ], -) -def test_get_int_error(val: Any): - """Test function :func:`configuration.get_int`.""" - with pytest.raises(ValueError): - configuration.get_int(val) - - -@pytest.mark.parametrize( - "val, expect", - [ - ("t", True), - ("true", True), - (1, True), - (True, True), - ("f", False), - ("false", False), - (0, False), - (123, False), - ("abc", False), - ("abc1", False), - (False, False), - ], -) -def test_get_bool(val: Any, expect: bool): - """Test function :func:`configuration.get_bool`.""" - assert configuration.get_bool(val) == expect - - -@pytest.mark.parametrize( - "home, expect", - [ - (None, "~/pydolphinscheduler/config.yaml"), - ("/tmp/pydolphinscheduler", "/tmp/pydolphinscheduler/config.yaml"), - ("/tmp/test_abc", "/tmp/test_abc/config.yaml"), - ], -) -def test_config_path(home: Any, expect: str): - """Test function :func:`config_path`.""" - if home: - os.environ[ENV_PYDS_HOME] = home - assert Path(expect).expanduser() == configuration.config_path() - - -@pytest.mark.parametrize( - "home", - [ - None, - "/tmp/pydolphinscheduler", - "/tmp/test_abc", - ], -) -def test_init_config_file(teardown_file_env, home: Any): - """Test init config file.""" - if home: - os.environ[ENV_PYDS_HOME] = home - elif DEV_MODE: - pytest.skip( - "Avoid delete ~/pydolphinscheduler/config.yaml by accident when test locally." - ) - assert not config_path().exists() - configuration.init_config_file() - assert config_path().exists() - - assert get_file_content(config_path()) == get_file_content(BUILD_IN_CONFIG_PATH) - - -@pytest.mark.parametrize( - "home", - [ - None, - "/tmp/pydolphinscheduler", - "/tmp/test_abc", - ], -) -def test_init_config_file_duplicate(teardown_file_env, home: Any): - """Test raise error with init config file which already exists.""" - if home: - os.environ[ENV_PYDS_HOME] = home - elif DEV_MODE: - pytest.skip( - "Avoid delete ~/pydolphinscheduler/config.yaml by accident when test locally." - ) - assert not config_path().exists() - configuration.init_config_file() - assert config_path().exists() - - with pytest.raises(PyDSConfException, match=".*file already exists.*"): - configuration.init_config_file() - - -def test_get_configs_build_in(): - """Test function :func:`get_configs` with build-in config file.""" - content = get_file_content(BUILD_IN_CONFIG_PATH) - assert YamlParser(content).src_parser == configuration.get_configs().src_parser - assert YamlParser(content).dict_parser == configuration.get_configs().dict_parser - - -@pytest.mark.parametrize( - "key, val, new_val", - [ - ("java_gateway.address", "127.0.0.1", "127.1.1.1"), - ("java_gateway.port", 25333, 25555), - ("java_gateway.auto_convert", True, False), - ("default.user.name", "userPythonGateway", "editUserPythonGateway"), - ("default.user.password", "userPythonGateway", "editUserPythonGateway"), - ( - "default.user.email", - "userPythonGateway@dolphinscheduler.com", - "userPythonGateway@edit.com", - ), - ("default.user.phone", 11111111111, 22222222222), - ("default.user.state", 1, 0), - ("default.workflow.project", "project-pydolphin", "eidt-project-pydolphin"), - ("default.workflow.tenant", "tenant_pydolphin", "edit_tenant_pydolphin"), - ("default.workflow.user", "userPythonGateway", "editUserPythonGateway"), - ("default.workflow.queue", "queuePythonGateway", "editQueuePythonGateway"), - ("default.workflow.worker_group", "default", "specific"), - ("default.workflow.time_zone", "Asia/Shanghai", "Asia/Beijing"), - ("default.workflow.warning_type", "NONE", "ALL"), - ], -) -def test_single_config_get_set(teardown_file_env, key: str, val: Any, new_val: Any): - """Test function :func:`get_single_config` and :func:`set_single_config`.""" - assert val == get_single_config(key) - set_single_config(key, new_val) - assert new_val == get_single_config(key) - - -def test_single_config_get_set_not_exists_key(): - """Test function :func:`get_single_config` and :func:`set_single_config` error while key not exists.""" - not_exists_key = "i_am_not_exists_key" - with pytest.raises(PyDSConfException, match=".*do not exists.*"): - get_single_config(not_exists_key) - with pytest.raises(PyDSConfException, match=".*do not exists.*"): - set_single_config(not_exists_key, not_exists_key) - - -@pytest.mark.parametrize( - "config_name, expect", - [ - ("JAVA_GATEWAY_ADDRESS", "127.0.0.1"), - ("JAVA_GATEWAY_PORT", 25333), - ("JAVA_GATEWAY_AUTO_CONVERT", True), - ("USER_NAME", "userPythonGateway"), - ("USER_PASSWORD", "userPythonGateway"), - ("USER_EMAIL", "userPythonGateway@dolphinscheduler.com"), - ("USER_PHONE", "11111111111"), - ("USER_STATE", 1), - ("WORKFLOW_PROJECT", "project-pydolphin"), - ("WORKFLOW_TENANT", "tenant_pydolphin"), - ("WORKFLOW_USER", "userPythonGateway"), - ("WORKFLOW_QUEUE", "queuePythonGateway"), - ("WORKFLOW_WORKER_GROUP", "default"), - ("WORKFLOW_TIME_ZONE", "Asia/Shanghai"), - ("WORKFLOW_WARNING_TYPE", "NONE"), - ], -) -def test_get_configuration(config_name: str, expect: Any): - """Test get exists attribute in :mod:`configuration`.""" - assert expect == getattr(configuration, config_name) - - -@pytest.mark.parametrize( - "config_name, src, dest", - [ - ("JAVA_GATEWAY_ADDRESS", "127.0.0.1", "192.168.1.1"), - ("JAVA_GATEWAY_PORT", 25333, 25334), - ("JAVA_GATEWAY_AUTO_CONVERT", True, False), - ("USER_NAME", "userPythonGateway", "envUserPythonGateway"), - ("USER_PASSWORD", "userPythonGateway", "envUserPythonGateway"), - ( - "USER_EMAIL", - "userPythonGateway@dolphinscheduler.com", - "userPythonGateway@dolphinscheduler.com", - ), - ("USER_PHONE", "11111111111", "22222222222"), - ("USER_STATE", 1, 0), - ("WORKFLOW_PROJECT", "project-pydolphin", "env-project-pydolphin"), - ("WORKFLOW_TENANT", "tenant_pydolphin", "env-tenant_pydolphin"), - ("WORKFLOW_USER", "userPythonGateway", "envUserPythonGateway"), - ("WORKFLOW_QUEUE", "queuePythonGateway", "envQueuePythonGateway"), - ("WORKFLOW_WORKER_GROUP", "default", "custom"), - ("WORKFLOW_TIME_ZONE", "Asia/Shanghai", "America/Los_Angeles"), - ("WORKFLOW_WARNING_TYPE", "NONE", "ALL"), - ], -) -def test_get_configuration_env(config_name: str, src: Any, dest: Any): - """Test get exists attribute from environment variable in :mod:`configuration`.""" - assert getattr(configuration, config_name) == src - - env_name = f"PYDS_{config_name}" - os.environ[env_name] = str(dest) - # reload module configuration to re-get config from environment. - importlib.reload(configuration) - assert getattr(configuration, config_name) == dest - - # pop and reload configuration to test whether this config equal to `src` value - os.environ.pop(env_name, None) - importlib.reload(configuration) - assert getattr(configuration, config_name) == src - assert env_name not in os.environ diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_database.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_database.py deleted file mode 100644 index 1286a4a7f8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_database.py +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Database.""" - - -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.core.database import Database - -TEST_DATABASE_DATASOURCE_NAME = "test_datasource" -TEST_DATABASE_TYPE_KEY = "type" -TEST_DATABASE_KEY = "datasource" - - -@pytest.mark.parametrize( - "expect", - [ - { - TEST_DATABASE_TYPE_KEY: "mock_type", - TEST_DATABASE_KEY: 1, - } - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "mock_type"}), -) -def test_get_datasource_detail(mock_datasource, mock_code_version, expect): - """Test :func:`get_database_type` and :func:`get_database_id` can return expect value.""" - database_info = Database( - TEST_DATABASE_DATASOURCE_NAME, TEST_DATABASE_TYPE_KEY, TEST_DATABASE_KEY - ) - assert expect == database_info diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_default_config_yaml.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_default_config_yaml.py deleted file mode 100644 index b4d5e07c7a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_default_config_yaml.py +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test default config file.""" - -from ruamel.yaml import YAML -from ruamel.yaml.comments import CommentedMap - -from tests.testing.path import path_default_config_yaml - - -def nested_key_check(comment_map: CommentedMap) -> None: - """Test whether default configuration file exists specific character.""" - for key, val in comment_map.items(): - assert "." not in key, f"There is not allowed special character in key `{key}`." - if isinstance(val, CommentedMap): - nested_key_check(val) - - -def test_key_without_dot_delimiter(): - """Test wrapper of whether default configuration file exists specific character.""" - yaml = YAML() - with open(path_default_config_yaml, "r") as f: - comment_map = yaml.load(f.read()) - nested_key_check(comment_map) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_engine.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_engine.py deleted file mode 100644 index ba44fad669..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_engine.py +++ /dev/null @@ -1,148 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Engine.""" - - -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.core.engine import Engine, ProgramType - -TEST_ENGINE_TASK_TYPE = "ENGINE" -TEST_MAIN_CLASS = "org.apache.examples.mock.Mock" -TEST_MAIN_PACKAGE = "Mock.jar" -TEST_PROGRAM_TYPE = ProgramType.JAVA - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "mock_name"}), -) -def test_get_jar_detail(mock_resource, mock_code_version): - """Test :func:`get_jar_id` can return expect value.""" - name = "test_get_jar_detail" - task = Engine( - name, - TEST_ENGINE_TASK_TYPE, - TEST_MAIN_CLASS, - TEST_MAIN_PACKAGE, - TEST_PROGRAM_TYPE, - ) - assert 1 == task.get_jar_id() - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "test-task-params", - "task_type": "test-engine", - "main_class": "org.apache.examples.mock.Mock", - "main_package": "TestMock.jar", - "program_type": ProgramType.JAVA, - }, - { - "mainClass": "org.apache.examples.mock.Mock", - "mainJar": { - "id": 1, - }, - "programType": ProgramType.JAVA, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "mock_name"}), -) -def test_property_task_params(mock_resource, mock_code_version, attr, expect): - """Test task engine task property.""" - task = Engine(**attr) - assert expect == task.task_params - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "test-task-test_engine_get_define", - "task_type": "test-engine", - "main_class": "org.apache.examples.mock.Mock", - "main_package": "TestMock.jar", - "program_type": ProgramType.JAVA, - }, - { - "code": 123, - "name": "test-task-test_engine_get_define", - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "test-engine", - "taskParams": { - "mainClass": "org.apache.examples.mock.Mock", - "mainJar": { - "id": 1, - }, - "programType": ProgramType.JAVA, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "mock_name"}), -) -def test_engine_get_define(mock_resource, mock_code_version, attr, expect): - """Test task engine function get_define.""" - task = Engine(**attr) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_process_definition.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_process_definition.py deleted file mode 100644 index 30445bfbf3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_process_definition.py +++ /dev/null @@ -1,502 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test process definition.""" - -from datetime import datetime -from typing import Any, List -from unittest.mock import patch - -import pytest -from freezegun import freeze_time - -from pydolphinscheduler import configuration -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.core.resource import Resource -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.models import Project, Tenant, User -from pydolphinscheduler.tasks.switch import Branch, Default, Switch, SwitchCondition -from pydolphinscheduler.utils.date import conv_to_schedule -from tests.testing.task import Task - -TEST_PROCESS_DEFINITION_NAME = "simple-test-process-definition" -TEST_TASK_TYPE = "test-task-type" - - -@pytest.mark.parametrize("func", ["run", "submit", "start"]) -def test_process_definition_key_attr(func): - """Test process definition have specific functions or attributes.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - assert hasattr( - pd, func - ), f"ProcessDefinition instance don't have attribute `{func}`" - - -@pytest.mark.parametrize( - "name,value", - [ - ("timezone", configuration.WORKFLOW_TIME_ZONE), - ("project", Project(configuration.WORKFLOW_PROJECT)), - ("tenant", Tenant(configuration.WORKFLOW_TENANT)), - ( - "user", - User( - configuration.USER_NAME, - configuration.USER_PASSWORD, - configuration.USER_EMAIL, - configuration.USER_PHONE, - configuration.WORKFLOW_TENANT, - configuration.WORKFLOW_QUEUE, - configuration.USER_STATE, - ), - ), - ("worker_group", configuration.WORKFLOW_WORKER_GROUP), - ("warning_type", configuration.WORKFLOW_WARNING_TYPE), - ("warning_group_id", 0), - ("release_state", 1), - ], -) -def test_process_definition_default_value(name, value): - """Test process definition default attributes.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - assert getattr(pd, name) == value, ( - f"ProcessDefinition instance attribute `{name}` not with " - f"except default value `{getattr(pd, name)}`" - ) - - -@pytest.mark.parametrize( - "name,cls,expect", - [ - ("name", str, "name"), - ("description", str, "description"), - ("schedule", str, "schedule"), - ("timezone", str, "timezone"), - ("worker_group", str, "worker_group"), - ("warning_type", str, "FAILURE"), - ("warning_group_id", int, 1), - ("timeout", int, 1), - ("param", dict, {"key": "value"}), - ( - "resource_list", - List, - [Resource(name="/dev/test.py", content="hello world", description="desc")], - ), - ], -) -def test_set_attr(name, cls, expect): - """Test process definition set attributes which get with same type.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - setattr(pd, name, expect) - assert ( - getattr(pd, name) == expect - ), f"ProcessDefinition set attribute `{name}` do not work expect" - - -@pytest.mark.parametrize( - "value,expect", - [ - ("online", 1), - ("offline", 0), - ], -) -def test_set_release_state(value, expect): - """Test process definition set release_state attributes.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME, release_state=value) as pd: - assert ( - getattr(pd, "release_state") == expect - ), "ProcessDefinition set attribute release_state do not return expect value." - - -@pytest.mark.parametrize( - "value", - [ - "oneline", - "offeline", - 1, - 0, - None, - ], -) -def test_set_release_state_error(value): - """Test process definition set release_state attributes with error.""" - pd = ProcessDefinition(TEST_PROCESS_DEFINITION_NAME, release_state=value) - with pytest.raises( - PyDSParamException, - match="Parameter release_state only support `online` or `offline` but get.*", - ): - pd.release_state - - -@pytest.mark.parametrize( - "set_attr,set_val,get_attr,get_val", - [ - ("_project", "project", "project", Project("project")), - ("_tenant", "tenant", "tenant", Tenant("tenant")), - ("_start_time", "2021-01-01", "start_time", datetime(2021, 1, 1)), - ("_end_time", "2021-01-01", "end_time", datetime(2021, 1, 1)), - ], -) -def test_set_attr_return_special_object(set_attr, set_val, get_attr, get_val): - """Test process definition set attributes which get with different type.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - setattr(pd, set_attr, set_val) - assert get_val == getattr( - pd, get_attr - ), f"Set attribute {set_attr} can not get back with {get_val}." - - -@pytest.mark.parametrize( - "val,expect", - [ - (datetime(2021, 1, 1), datetime(2021, 1, 1)), - (None, None), - ("2021-01-01", datetime(2021, 1, 1)), - ("2021-01-01 01:01:01", datetime(2021, 1, 1, 1, 1, 1)), - ], -) -def test__parse_datetime(val, expect): - """Test process definition function _parse_datetime. - - Only two datetime test cases here because we have more test cases in tests/utils/test_date.py file. - """ - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - assert expect == pd._parse_datetime( - val - ), f"Function _parse_datetime with unexpect value by {val}." - - -@pytest.mark.parametrize( - "val", - [ - 20210101, - (2021, 1, 1), - {"year": "2021", "month": "1", "day": 1}, - ], -) -def test__parse_datetime_not_support_type(val: Any): - """Test process definition function _parse_datetime not support type error.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - with pytest.raises(PyDSParamException, match="Do not support value type.*?"): - pd._parse_datetime(val) - - -@pytest.mark.parametrize( - "val", - [ - "ALLL", - "nonee", - ], -) -def test_warn_type_not_support_type(val: str): - """Test process definition param warning_type not support type error.""" - with pytest.raises( - PyDSParamException, match="Parameter `warning_type` with unexpect value.*?" - ): - ProcessDefinition(TEST_PROCESS_DEFINITION_NAME, warning_type=val) - - -@pytest.mark.parametrize( - "param, expect", - [ - ( - None, - [], - ), - ( - {}, - [], - ), - ( - {"key1": "val1"}, - [ - { - "prop": "key1", - "direct": "IN", - "type": "VARCHAR", - "value": "val1", - } - ], - ), - ( - { - "key1": "val1", - "key2": "val2", - }, - [ - { - "prop": "key1", - "direct": "IN", - "type": "VARCHAR", - "value": "val1", - }, - { - "prop": "key2", - "direct": "IN", - "type": "VARCHAR", - "value": "val2", - }, - ], - ), - ], -) -def test_property_param_json(param, expect): - """Test ProcessDefinition's property param_json.""" - pd = ProcessDefinition(TEST_PROCESS_DEFINITION_NAME, param=param) - assert pd.param_json == expect - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test__pre_submit_check_switch_without_param(mock_code_version): - """Test :func:`_pre_submit_check` if process definition with switch but without attribute param.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - parent = Task(name="parent", task_type=TEST_TASK_TYPE) - switch_child_1 = Task(name="switch_child_1", task_type=TEST_TASK_TYPE) - switch_child_2 = Task(name="switch_child_2", task_type=TEST_TASK_TYPE) - switch_condition = SwitchCondition( - Branch(condition="${var} > 1", task=switch_child_1), - Default(task=switch_child_2), - ) - - switch = Switch(name="switch", condition=switch_condition) - parent >> switch - with pytest.raises( - PyDSParamException, - match="Parameter param or at least one local_param of task must " - "be provider if task Switch in process definition.", - ): - pd._pre_submit_check() - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test__pre_submit_check_switch_with_local_params(mock_code_version): - """Test :func:`_pre_submit_check` if process definition with switch with local params of task.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - parent = Task( - name="parent", - task_type=TEST_TASK_TYPE, - local_params=[ - {"prop": "var", "direct": "OUT", "type": "VARCHAR", "value": ""} - ], - ) - switch_child_1 = Task(name="switch_child_1", task_type=TEST_TASK_TYPE) - switch_child_2 = Task(name="switch_child_2", task_type=TEST_TASK_TYPE) - switch_condition = SwitchCondition( - Branch(condition="${var} > 1", task=switch_child_1), - Default(task=switch_child_2), - ) - - switch = Switch(name="switch", condition=switch_condition) - parent >> switch - pd._pre_submit_check() - - -def test_process_definition_get_define_without_task(): - """Test process definition function get_define without task.""" - expect = { - "name": TEST_PROCESS_DEFINITION_NAME, - "description": None, - "project": configuration.WORKFLOW_PROJECT, - "tenant": configuration.WORKFLOW_TENANT, - "workerGroup": configuration.WORKFLOW_WORKER_GROUP, - "warningType": configuration.WORKFLOW_WARNING_TYPE, - "warningGroupId": 0, - "timeout": 0, - "releaseState": 1, - "param": None, - "tasks": {}, - "taskDefinitionJson": [{}], - "taskRelationJson": [{}], - "resourceList": [], - } - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - assert pd.get_define() == expect - - -def test_process_definition_simple_context_manager(): - """Test simple create workflow in process definition context manager mode.""" - expect_tasks_num = 5 - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - for i in range(expect_tasks_num): - curr_task = Task(name=f"task-{i}", task_type=f"type-{i}") - # Set deps task i as i-1 parent - if i > 0: - pre_task = pd.get_one_task_by_name(f"task-{i - 1}") - curr_task.set_upstream(pre_task) - assert len(pd.tasks) == expect_tasks_num - - # Test if task process_definition same as origin one - task: Task = pd.get_one_task_by_name("task-0") - assert pd is task.process_definition - - # Test if all tasks with expect deps - for i in range(expect_tasks_num): - task: Task = pd.get_one_task_by_name(f"task-{i}") - if i == 0: - assert task._upstream_task_codes == set() - assert task._downstream_task_codes == { - pd.get_one_task_by_name("task-1").code - } - elif i == expect_tasks_num - 1: - assert task._upstream_task_codes == { - pd.get_one_task_by_name(f"task-{i - 1}").code - } - assert task._downstream_task_codes == set() - else: - assert task._upstream_task_codes == { - pd.get_one_task_by_name(f"task-{i - 1}").code - } - assert task._downstream_task_codes == { - pd.get_one_task_by_name(f"task-{i + 1}").code - } - - -def test_process_definition_simple_separate(): - """Test process definition simple create workflow in separate mode. - - This test just test basic information, cause most of test case is duplicate to - test_process_definition_simple_context_manager. - """ - expect_tasks_num = 5 - pd = ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) - for i in range(expect_tasks_num): - curr_task = Task( - name=f"task-{i}", - task_type=f"type-{i}", - process_definition=pd, - ) - # Set deps task i as i-1 parent - if i > 0: - pre_task = pd.get_one_task_by_name(f"task-{i - 1}") - curr_task.set_upstream(pre_task) - assert len(pd.tasks) == expect_tasks_num - assert all(["task-" in task.name for task in pd.task_list]) - - -@pytest.mark.parametrize( - "user_attrs", - [ - {"tenant": "tenant_specific"}, - ], -) -def test_set_process_definition_user_attr(user_attrs): - """Test user with correct attributes if we specific assigned to process definition object.""" - default_value = { - "tenant": configuration.WORKFLOW_TENANT, - } - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME, **user_attrs) as pd: - user = pd.user - for attr in default_value: - # Get assigned attribute if we specific, else get default value - except_attr = ( - user_attrs[attr] if attr in user_attrs else default_value[attr] - ) - # Get actually attribute of user object - actual_attr = getattr(user, attr) - assert ( - except_attr == actual_attr - ), f"Except attribute is {except_attr} but get {actual_attr}" - - -def test_schedule_json_none_schedule(): - """Test function schedule_json with None as schedule.""" - with ProcessDefinition( - TEST_PROCESS_DEFINITION_NAME, - schedule=None, - ) as pd: - assert pd.schedule_json is None - - -# We freeze time here, because we test start_time with None, and if will get datetime.datetime.now. If we do -# not freeze time, it will cause flaky test here. -@freeze_time("2021-01-01") -@pytest.mark.parametrize( - "start_time,end_time,expect_date", - [ - ( - "20210101", - "20210201", - {"start_time": "2021-01-01 00:00:00", "end_time": "2021-02-01 00:00:00"}, - ), - ( - "2021-01-01", - "2021-02-01", - {"start_time": "2021-01-01 00:00:00", "end_time": "2021-02-01 00:00:00"}, - ), - ( - "2021/01/01", - "2021/02/01", - {"start_time": "2021-01-01 00:00:00", "end_time": "2021-02-01 00:00:00"}, - ), - # Test mix pattern - ( - "2021/01/01 01:01:01", - "2021-02-02 02:02:02", - {"start_time": "2021-01-01 01:01:01", "end_time": "2021-02-02 02:02:02"}, - ), - ( - "2021/01/01 01:01:01", - "20210202 020202", - {"start_time": "2021-01-01 01:01:01", "end_time": "2021-02-02 02:02:02"}, - ), - ( - "20210101 010101", - "2021-02-02 02:02:02", - {"start_time": "2021-01-01 01:01:01", "end_time": "2021-02-02 02:02:02"}, - ), - # Test None value - ( - "2021/01/01 01:02:03", - None, - {"start_time": "2021-01-01 01:02:03", "end_time": "9999-12-31 23:59:59"}, - ), - ( - None, - None, - { - "start_time": conv_to_schedule(datetime(2021, 1, 1)), - "end_time": "9999-12-31 23:59:59", - }, - ), - ], -) -def test_schedule_json_start_and_end_time(start_time, end_time, expect_date): - """Test function schedule_json about handle start_time and end_time. - - Only two datetime test cases here because we have more test cases in tests/utils/test_date.py file. - """ - schedule = "0 0 0 * * ? *" - expect = { - "crontab": schedule, - "startTime": expect_date["start_time"], - "endTime": expect_date["end_time"], - "timezoneId": configuration.WORKFLOW_TIME_ZONE, - } - with ProcessDefinition( - TEST_PROCESS_DEFINITION_NAME, - schedule=schedule, - start_time=start_time, - end_time=end_time, - timezone=configuration.WORKFLOW_TIME_ZONE, - ) as pd: - assert pd.schedule_json == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_resource_definition.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_resource_definition.py deleted file mode 100644 index 07fcac3547..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_resource_definition.py +++ /dev/null @@ -1,68 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test resource definition.""" -import pytest - -from pydolphinscheduler.core.resource import Resource -from pydolphinscheduler.exceptions import PyDSParamException - - -def test_resource(): - """Test resource set attributes which get with same type.""" - name = "/dev/test.py" - content = """print("hello world")""" - description = "hello world" - user_name = "test_user" - expect = { - "name": name, - "content": content, - "description": description, - "userName": user_name, - } - resourceDefinition = Resource( - name=name, content=content, description=description, user_name=user_name - ) - assert resourceDefinition.get_define() == expect - - -def test_empty_user_name(): - """Tests for the exception get info from database when the user name is null.""" - name = "/dev/test.py" - content = """print("hello world")""" - description = "hello world" - resourceDefinition = Resource(name=name, content=content, description=description) - with pytest.raises( - PyDSParamException, - match="`user_name` is required when querying resources from python gate.", - ): - resourceDefinition.get_info_from_database() - - -def test_empty_content(): - """Tests for the exception create or update resource when the user name or content is empty.""" - name = "/dev/test.py" - user_name = "test_user" - description = "hello world" - resourceDefinition = Resource( - name=name, description=description, user_name=user_name - ) - with pytest.raises( - PyDSParamException, - match="`user_name` and `content` are required when create or update resource from python gate.", - ): - resourceDefinition.create_or_update_resource() diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py deleted file mode 100644 index c6ef7773ae..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py +++ /dev/null @@ -1,470 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task class function.""" -import logging -import re -from typing import Set -from unittest.mock import PropertyMock, patch - -import pytest - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.core.task import Task, TaskRelation -from pydolphinscheduler.exceptions import PyResPluginException -from pydolphinscheduler.resources_plugin import Local -from tests.testing.task import Task as TestTask -from tests.testing.task import TaskWithCode - -TEST_TASK_RELATION_SET = set() -TEST_TASK_RELATION_SIZE = 0 - - -@pytest.mark.parametrize( - "addition, ignore, expect", - [ - ( - set(), - set(), - { - "local_params", - "resource_list", - "dependence", - "wait_start_timeout", - "condition_result", - }, - ), - ( - set(), - {"dependence", "condition_result", "not_exists"}, - { - "local_params", - "resource_list", - "wait_start_timeout", - }, - ), - ( - { - "not_exists_1", - "not_exists_2", - }, - set(), - { - "not_exists_1", - "not_exists_2", - "local_params", - "resource_list", - "dependence", - "wait_start_timeout", - "condition_result", - }, - ), - # test addition and ignore conflict to see behavior - ( - { - "not_exists", - }, - {"condition_result", "not_exists"}, - { - "not_exists", - "local_params", - "resource_list", - "dependence", - "wait_start_timeout", - }, - ), - ], -) -def test__get_attr(addition: Set, ignore: Set, expect: Set): - """Test task function `_get_attr`.""" - task = TestTask( - name="test-get-attr", - task_type="test", - ) - task._task_custom_attr = addition - task._task_ignore_attr = ignore - assert task._get_attr() == expect - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - dict(), - { - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ), - ( - { - "local_params": ["foo", "bar"], - "resource_list": ["foo", "bar"], - "dependence": {"foo", "bar"}, - "wait_start_timeout": {"foo", "bar"}, - "condition_result": {"foo": ["bar"]}, - }, - { - "localParams": ["foo", "bar"], - "resourceList": [{"id": 1}], - "dependence": {"foo", "bar"}, - "waitStartTimeout": {"foo", "bar"}, - "conditionResult": {"foo": ["bar"]}, - }, - ), - ], -) -@patch( - "pydolphinscheduler.core.resource.Resource.get_id_from_database", - return_value=1, -) -@patch( - "pydolphinscheduler.core.task.Task.user_name", - return_value="test_user", -) -def test_property_task_params(mock_resource, mock_user_name, attr, expect): - """Test class task property.""" - task = TestTask( - "test-property-task-params", - "test-task", - **attr, - ) - assert expect == task.task_params - - -@pytest.mark.parametrize( - "pre_code, post_code, expect", - [ - (123, 456, hash("123 -> 456")), - (12345678, 987654321, hash("12345678 -> 987654321")), - ], -) -def test_task_relation_hash_func(pre_code, post_code, expect): - """Test TaskRelation magic function :func:`__hash__`.""" - task_param = TaskRelation(pre_task_code=pre_code, post_task_code=post_code) - assert hash(task_param) == expect - - -@pytest.mark.parametrize( - "pre_code, post_code, size_add", - [ - (123, 456, 1), - (123, 456, 0), - (456, 456, 1), - (123, 123, 1), - (456, 123, 1), - (0, 456, 1), - (123, 0, 1), - ], -) -def test_task_relation_add_to_set(pre_code, post_code, size_add): - """Test TaskRelation with different pre_code and post_code add to set behavior. - - Here we use global variable to keep set of :class:`TaskRelation` instance and the number we expect - of the size when we add a new task relation to exists set. - """ - task_relation = TaskRelation(pre_task_code=pre_code, post_task_code=post_code) - TEST_TASK_RELATION_SET.add(task_relation) - # hint python interpreter use global variable instead of local's - global TEST_TASK_RELATION_SIZE - TEST_TASK_RELATION_SIZE += size_add - assert len(TEST_TASK_RELATION_SET) == TEST_TASK_RELATION_SIZE - - -def test_task_relation_to_dict(): - """Test TaskRelation object function to_dict.""" - pre_task_code = 123 - post_task_code = 456 - expect = { - "name": "", - "preTaskCode": pre_task_code, - "postTaskCode": post_task_code, - "preTaskVersion": 1, - "postTaskVersion": 1, - "conditionType": 0, - "conditionParams": {}, - } - task_relation = TaskRelation( - pre_task_code=pre_task_code, post_task_code=post_task_code - ) - assert task_relation.get_define() == expect - - -def test_task_get_define(): - """Test Task object function get_define.""" - code = 123 - version = 1 - name = "test_task_get_define" - task_type = "test_task_get_define_type" - expect = { - "code": code, - "name": name, - "version": version, - "description": None, - "delayTime": 0, - "taskType": task_type, - "taskParams": { - "resourceList": [], - "localParams": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = Task(name=name, task_type=task_type) - assert task.get_define() == expect - - -@pytest.mark.parametrize("shift", ["<<", ">>"]) -def test_two_tasks_shift(shift: str): - """Test bit operator between tasks. - - Here we test both `>>` and `<<` bit operator. - """ - upstream = TestTask(name="upstream", task_type=shift) - downstream = TestTask(name="downstream", task_type=shift) - if shift == "<<": - downstream << upstream - elif shift == ">>": - upstream >> downstream - else: - assert False, f"Unexpect bit operator type {shift}." - assert ( - 1 == len(upstream._downstream_task_codes) - and downstream.code in upstream._downstream_task_codes - ), "Task downstream task attributes error, downstream codes size or specific code failed." - assert ( - 1 == len(downstream._upstream_task_codes) - and upstream.code in downstream._upstream_task_codes - ), "Task upstream task attributes error, upstream codes size or upstream code failed." - - -@pytest.mark.parametrize( - "dep_expr, flag", - [ - ("task << tasks", "upstream"), - ("tasks << task", "downstream"), - ("task >> tasks", "downstream"), - ("tasks >> task", "upstream"), - ], -) -def test_tasks_list_shift(dep_expr: str, flag: str): - """Test bit operator between task and sequence of tasks. - - Here we test both `>>` and `<<` bit operator. - """ - reverse_dict = { - "upstream": "downstream", - "downstream": "upstream", - } - task_type = "dep_task_and_tasks" - task = TestTask(name="upstream", task_type=task_type) - tasks = [ - TestTask(name="downstream1", task_type=task_type), - TestTask(name="downstream2", task_type=task_type), - ] - - # Use build-in function eval to simply test case and reduce duplicate code - eval(dep_expr) - direction_attr = f"_{flag}_task_codes" - reverse_direction_attr = f"_{reverse_dict[flag]}_task_codes" - assert 2 == len(getattr(task, direction_attr)) - assert [t.code in getattr(task, direction_attr) for t in tasks] - - assert all([1 == len(getattr(t, reverse_direction_attr)) for t in tasks]) - assert all([task.code in getattr(t, reverse_direction_attr) for t in tasks]) - - -def test_add_duplicate(caplog): - """Test add task which code already in process definition.""" - with ProcessDefinition("test_add_duplicate_workflow") as _: - TaskWithCode(name="test_task_1", task_type="test", code=123, version=1) - with caplog.at_level(logging.WARNING): - TaskWithCode( - name="test_task_duplicate_code", task_type="test", code=123, version=2 - ) - assert all( - [ - caplog.text.startswith("WARNING pydolphinscheduler"), - re.findall("already in process definition", caplog.text), - ] - ) - - -@pytest.mark.parametrize( - "val, expected", - [ - ("a.sh", "echo Test task attribute ext_attr"), - ("a.zsh", "echo Test task attribute ext_attr"), - ("echo Test task attribute ext_attr", "echo Test task attribute ext_attr"), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.task.Task.ext", - new_callable=PropertyMock, - return_value={".sh", ".zsh"}, -) -@patch( - "pydolphinscheduler.core.task.Task.ext_attr", - new_callable=PropertyMock, - return_value="_raw_script", -) -@patch( - "pydolphinscheduler.core.task.Task._raw_script", - create=True, - new_callable=PropertyMock, -) -@patch("pydolphinscheduler.core.task.Task.get_plugin") -def test_task_ext_attr( - m_plugin, m_raw_script, m_ext_attr, m_ext, m_code_version, val, expected -): - """Test task attribute ext_attr.""" - m_plugin.return_value.read_file.return_value = expected - m_raw_script.return_value = val - task = Task("test_task_ext_attr", "test_task_ext_attr") - assert expected == getattr(task, "raw_script") - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "name": "test_task_abtain_res_plugin", - "task_type": "TaskType", - "resource_plugin": Local("prefix"), - "process_definition": ProcessDefinition( - name="process_definition", - resource_plugin=Local("prefix"), - ), - }, - "Local", - ), - ( - { - "name": "test_task_abtain_res_plugin", - "task_type": "TaskType", - "resource_plugin": Local("prefix"), - }, - "Local", - ), - ( - { - "name": "test_task_abtain_res_plugin", - "task_type": "TaskType", - "process_definition": ProcessDefinition( - name="process_definition", - resource_plugin=Local("prefix"), - ), - }, - "Local", - ), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch("pydolphinscheduler.core.task.Task.get_content") -def test_task_obtain_res_plugin(m_get_content, m_code_version, attr, expected): - """Test task obtaining resource plug-in.""" - task = Task(**attr) - assert expected == task.get_plugin().__class__.__name__ - - -@pytest.mark.parametrize( - "attr", - [ - { - "name": "test_task_abtain_res_plugin", - "task_type": "TaskType", - "process_definition": ProcessDefinition( - name="process_definition", - ), - }, - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch("pydolphinscheduler.core.task.Task.get_content") -def test_task_obtain_res_plugin_exception(m_get_content, m_code_version, attr): - """Test task obtaining resource plug-in exception.""" - with pytest.raises( - PyResPluginException, - match="The execution command of this task is a file, but the resource plugin is empty", - ): - task = Task(**attr) - task.get_plugin() - - -@pytest.mark.parametrize( - "resources, expect", - [ - ( - ["/dev/test.py"], - [{"id": 1}], - ), - ( - ["/dev/test.py", {"id": 2}], - [{"id": 1}, {"id": 2}], - ), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.resource.Resource.get_id_from_database", - return_value=1, -) -@patch( - "pydolphinscheduler.core.task.Task.user_name", - return_value="test_user", -) -def test_python_resource_list( - mock_code_version, mock_resource, mock_user_name, resources, expect -): - """Test python task resource list.""" - task = Task( - name="python_resource_list.", - task_type="PYTHON", - resource_list=resources, - ) - assert task.resource_list == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_yaml_process_define.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_yaml_process_define.py deleted file mode 100644 index 99ad179a5f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_yaml_process_define.py +++ /dev/null @@ -1,191 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test YAML process.""" - -import os -from pathlib import Path -from unittest.mock import patch - -import pytest - -from pydolphinscheduler import configuration, tasks -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.core.yaml_process_define import ( - ParseTool, - create_process_definition, - get_task_cls, -) -from pydolphinscheduler.exceptions import PyDSTaskNoFoundException -from tests.testing.path import path_yaml_example -from tests.testing.task import Task - - -@pytest.mark.parametrize( - "string_param, expect", - [ - ("$ENV{PROJECT_NAME}", "~/pydolphinscheduler"), - ], -) -def test_parse_tool_env_exist(string_param, expect): - """Test parsing the environment variable.""" - os.environ["PROJECT_NAME"] = expect - assert expect == ParseTool.parse_string_param_if_env(string_param) - - -def test_parse_tool_env_not_exist(): - """Test parsing the not exist environment variable.""" - key = "THIS_ENV_NOT_EXIST_0000000" - string_param = "$ENV{%s}" % key - expect = "$" + key - assert expect == ParseTool.parse_string_param_if_env(string_param) - - -@pytest.mark.parametrize( - "string_param, expect_key", - [ - ("${CONFIG.java_gateway.address}", "java_gateway.address"), - ("${CONFIG.WORKFLOW_PROJECT}", "default.workflow.project"), - ], -) -def test_parse_tool_config(string_param, expect_key): - """Test parsing configuration.""" - expect = configuration.get_single_config(expect_key) - assert expect == ParseTool.parse_string_param_if_config(string_param) - - -def test_parse_possible_yaml_file(): - """Test parsing possible path.""" - folder = Path(path_yaml_example) - file_name = "Shell.yaml" - path = folder.joinpath(file_name) - - with open(path, "r") as f: - expect = "".join(f) - - string_param = '$FILE{"%s"}' % file_name - content_ = ParseTool.parse_string_param_if_file(string_param, base_folder=folder) - - assert expect == content_ - - -def test_parse_tool_parse_possible_path_file(): - """Test parsing possible path.""" - folder = Path(path_yaml_example) - file_name = "Shell.yaml" - path = folder.joinpath(file_name) - - possible_path = ParseTool.get_possible_path(path, base_folder=folder) - assert path == possible_path - - possible_path = ParseTool.get_possible_path(file_name, base_folder=folder) - assert path == possible_path - - possible_path = ParseTool.get_possible_path(file_name, base_folder=".") - assert path != possible_path - - -@pytest.mark.parametrize( - "task_type, expect", - [ - ("shell", tasks.Shell), - ("Shell", tasks.Shell), - ("ShEll", tasks.Shell), - ("Condition", tasks.Condition), - ("DataX", tasks.DataX), - ("CustomDataX", tasks.CustomDataX), - ("Dependent", tasks.Dependent), - ("Flink", tasks.Flink), - ("Http", tasks.Http), - ("MR", tasks.MR), - ("Procedure", tasks.Procedure), - ("Python", tasks.Python), - ("Shell", tasks.Shell), - ("Spark", tasks.Spark), - ("Sql", tasks.Sql), - ("SubProcess", tasks.SubProcess), - ("Switch", tasks.Switch), - ("SageMaker", tasks.SageMaker), - ], -) -def test_get_task(task_type, expect): - """Test get task function.""" - assert expect == get_task_cls(task_type) - - -@pytest.mark.parametrize( - "task_type", - [ - ("MYSQL"), - ], -) -def test_get_error(task_type): - """Test get task cls error.""" - with pytest.raises( - PyDSTaskNoFoundException, - match=f"not find task {task_type}", - ): - get_task_cls(task_type) - - -@pytest.mark.parametrize( - "yaml_file", - [ - ("Condition.yaml"), - ("DataX.yaml"), - ("Dependent.yaml"), - ("Flink.yaml"), - ("Procedure.yaml"), - ("Http.yaml"), - ("MapReduce.yaml"), - ("Python.yaml"), - ("Shell.yaml"), - ("Spark.yaml"), - ("Sql.yaml"), - ("SubProcess.yaml"), - # ("Switch.yaml"), - ("MoreConfiguration.yaml"), - ], -) -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "test"}), -) -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "mock_type"}), -) -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": 0, - "processDefinitionCode": 0, - "taskDefinitionCode": 0, - }, -) -@patch.object(ProcessDefinition, "run") -@patch.object(ProcessDefinition, "submit") -def test_get_create_process_definition( - prun, psubmit, dep_item, db_info, resource_info, yaml_file -): - """Test create_process_definition function to parse example YAML file.""" - yaml_file_path = Path(path_yaml_example).joinpath(yaml_file) - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - side_effect=Task("test_func_wrap", "func_wrap").gen_code_and_version, - ): - create_process_definition(yaml_file_path) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/example/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/example/__init__.py deleted file mode 100644 index 49323e711d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/example/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init example package tests.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py b/dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py deleted file mode 100644 index 319ad961f7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py +++ /dev/null @@ -1,176 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test example.""" - -import ast -import importlib -from unittest.mock import patch - -import pytest - -from tests.testing.constants import task_without_example -from tests.testing.path import get_all_examples, get_tasks -from tests.testing.task import Task - -process_definition_name = set() - - -def import_module(script_name, script_path): - """Import and run example module in examples directory.""" - spec = importlib.util.spec_from_file_location(script_name, script_path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - return module - - -def test_task_without_example(): - """Test task which without example. - - Avoiding add new type of tasks but without adding example describe how to use it. - """ - # We use example/tutorial.py as shell task example - ignore_name = {"__init__.py", "shell.py", "func_wrap.py"} - all_tasks = {task.stem for task in get_tasks(ignore_name=ignore_name)} - - have_example_tasks = set() - start = "task_" - end = "_example" - for ex in get_all_examples(): - stem = ex.stem - if stem.startswith(start) and stem.endswith(end): - task_name = stem.replace(start, "").replace(end, "") - have_example_tasks.add(task_name) - - assert all_tasks.difference(have_example_tasks) == task_without_example - - -@pytest.fixture -def setup_and_teardown_for_stuff(): - """Fixture of py.test handle setup and teardown.""" - yield - global process_definition_name - process_definition_name = set() - - -def submit_check_without_same_name(self): - """Side effect for verifying process definition name and adding it to global variable.""" - if self.name in process_definition_name: - raise ValueError( - "Example process definition should not have same name, but get duplicate name: %s", - self.name, - ) - submit_add_process_definition(self) - - -def submit_add_process_definition(self): - """Side effect for adding process definition name to global variable.""" - process_definition_name.add(self.name) - - -def test_example_basic(): - """Test example basic information. - - Which including: - * File extension name is `.py` - * All example except `tutorial.py` is end with keyword "_example" - * All example must have not empty `__doc__`. - """ - for ex in get_all_examples(): - # All files in example is python script - assert ( - ex.suffix == ".py" - ), f"We expect all examples is python script, but get {ex.name}." - - # All except tutorial and __init__ is end with keyword "_example" - if ( - ex.stem - not in ("tutorial", "tutorial_decorator", "tutorial_resource_plugin") - and ex.stem != "__init__" - ): - assert ex.stem.endswith( - "_example" - ), f"We expect all examples script end with keyword '_example', but get {ex.stem}." - - # All files have __doc__ - tree = ast.parse(ex.read_text()) - example_doc = ast.get_docstring(tree, clean=False) - assert ( - example_doc is not None - ), f"We expect all examples have __doc__, but {ex.name} do not." - - -@patch("pydolphinscheduler.core.process_definition.ProcessDefinition.start") -@patch( - "pydolphinscheduler.core.process_definition.ProcessDefinition.submit", - side_effect=submit_check_without_same_name, - autospec=True, -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - # Example bulk_create_example.py would create workflow dynamic by :func:`get_one_task_by_name` - # and would raise error in :func:`get_one_task_by_name` if we return constant value - # using :arg:`return_value` - side_effect=Task("test_example", "test_example").gen_code_and_version, -) -def test_example_process_definition_without_same_name( - mock_code_version, mock_submit, mock_start -): - """Test all examples file without same process definition's name. - - Our process definition would compete with others if we have same process definition name. It will make - different between actually workflow and our workflow-as-code file which make users feel strange. - """ - for ex in get_all_examples(): - # We use side_effect `submit_check_without_same_name` overwrite :func:`submit` - # and check whether it have duplicate name or not - import_module(ex.name, str(ex)) - assert True - - -@patch("pydolphinscheduler.core.process_definition.ProcessDefinition.start") -@patch( - "pydolphinscheduler.core.process_definition.ProcessDefinition.submit", - side_effect=submit_add_process_definition, - autospec=True, -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - # Example bulk_create_example.py would create workflow dynamic by :func:`get_one_task_by_name` - # and would raise error in :func:`get_one_task_by_name` if we return constant value - # using :arg:`return_value` - side_effect=Task("test_example", "test_example").gen_code_and_version, -) -def test_file_name_in_process_definition(mock_code_version, mock_submit, mock_start): - """Test example file name in example definition name. - - We should not directly assert equal, because some of the examples contain - more than one process definition. - """ - global process_definition_name - for ex in get_all_examples(): - # Skip __init__ file - if ex.stem == "__init__": - continue - # Skip bulk_create_example check, cause it contain multiple workflow and - # without one named bulk_create_example - if ex.stem == "bulk_create_example": - continue - process_definition_name = set() - assert ex.stem not in process_definition_name - import_module(ex.name, str(ex)) - assert ex.stem in process_definition_name diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/__init__.py deleted file mode 100644 index 65625a9f04..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test integration between Python API and PythonGatewayService.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/conftest.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/conftest.py deleted file mode 100644 index c15b89768d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/conftest.py +++ /dev/null @@ -1,51 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""py.test conftest.py file for package integration test.""" - -import os - -import pytest - -from tests.testing.docker_wrapper import DockerWrapper - - -@pytest.fixture(scope="package", autouse=True) -def docker_setup_teardown(): - """Fixture for whole package tests, Set up and teardown docker env. - - Fixture in file named ``conftest.py`` with ``scope=package`` could be auto import in the - whole package, and with attribute ``autouse=True`` will be auto-use for each test cases. - - .. seealso:: - For more information about conftest.py see: - https://docs.pytest.org/en/latest/example/simple.html#package-directory-level-fixtures-setups - """ - if os.environ.get("skip_launch_docker") == "true": - yield True - else: - docker_wrapper = DockerWrapper( - image="apache/dolphinscheduler-standalone-server:ci", - container_name="ci-dolphinscheduler-standalone-server", - ) - ports = {"25333/tcp": 25333, "12345/tcp": 12345} - container = docker_wrapper.run_until_log( - log="Started StandaloneServer in", tty=True, ports=ports - ) - assert container is not None - yield - docker_wrapper.remove_container() diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_java_gateway.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_java_gateway.py deleted file mode 100644 index 8b7c5ff845..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_java_gateway.py +++ /dev/null @@ -1,53 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test pydolphinscheduler java gateway.""" - - -from py4j.java_gateway import JavaGateway, java_import - - -def test_gateway_connect(): - """Test weather client could connect java gate way or not.""" - gateway = JavaGateway() - app = gateway.entry_point - assert app.ping() == "PONG" - - -def test_jvm_simple(): - """Test use JVM build-in object and operator from java gateway.""" - gateway = JavaGateway() - smallest = gateway.jvm.java.lang.Integer.MIN_VALUE - biggest = gateway.jvm.java.lang.Integer.MAX_VALUE - assert smallest is not None and biggest is not None - assert biggest > smallest - - -def test_python_client_java_import_single(): - """Test import single class from java gateway.""" - gateway = JavaGateway() - java_import(gateway.jvm, "org.apache.dolphinscheduler.common.utils.FileUtils") - assert hasattr(gateway.jvm, "FileUtils") - - -def test_python_client_java_import_package(): - """Test import package contain multiple class from java gateway.""" - gateway = JavaGateway() - java_import(gateway.jvm, "org.apache.dolphinscheduler.common.utils.*") - # test if jvm view have some common utils - for util in ("FileUtils", "OSUtils", "DateUtils"): - assert hasattr(gateway.jvm, util) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_process_definition.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_process_definition.py deleted file mode 100644 index 1672bde530..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_process_definition.py +++ /dev/null @@ -1,50 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test process definition in integration.""" - -from typing import Dict - -import pytest - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.shell import Shell - -PROCESS_DEFINITION_NAME = "test_change_exists_attr_pd" -TASK_NAME = f"task_{PROCESS_DEFINITION_NAME}" - - -@pytest.mark.parametrize( - "pre, post", - [ - ( - { - "user": "pre_user", - }, - { - "user": "post_user", - }, - ) - ], -) -def test_change_process_definition_attr(pre: Dict, post: Dict): - """Test whether process definition success when specific attribute change.""" - assert pre.keys() == post.keys(), "Not equal keys for pre and post attribute." - for attrs in [pre, post]: - with ProcessDefinition(name=PROCESS_DEFINITION_NAME, **attrs) as pd: - Shell(name=TASK_NAME, command="echo 1") - pd.submit() diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_project.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_project.py deleted file mode 100644 index 167ce2d8c9..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_project.py +++ /dev/null @@ -1,78 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test pydolphinscheduler project.""" -import pytest - -from pydolphinscheduler.models import Project, User - - -def get_user( - name="test-name", - password="test-password", - email="test-email@abc.com", - phone="17366637777", - tenant="test-tenant", - queue="test-queue", - status=1, -): - """Get a test user.""" - user = User(name, password, email, phone, tenant, queue, status) - user.create_if_not_exists() - return user - - -def get_project(name="test-name-1", description="test-description", code=1): - """Get a test project.""" - project = Project(name, description, code=code) - user = get_user() - project.create_if_not_exists(user=user.name) - return project - - -def test_create_and_get_project(): - """Test create and get project from java gateway.""" - project = get_project() - project_ = Project.get_project_by_name(user="test-name", name=project.name) - assert project_.name == project.name - assert project_.description == project.description - - -def test_update_project(): - """Test update project from java gateway.""" - project = get_project() - project = project.get_project_by_name(user="test-name", name=project.name) - project.update( - user="test-name", - project_code=project.code, - project_name="test-name-updated", - description="test-description-updated", - ) - project_ = Project.get_project_by_name(user="test-name", name="test-name-updated") - assert project_.description == "test-description-updated" - - -def test_delete_project(): - """Test delete project from java gateway.""" - project = get_project() - project.get_project_by_name(user="test-name", name=project.name) - project.delete(user="test-name") - - with pytest.raises(AttributeError) as excinfo: - _ = project.name - - assert excinfo.type == AttributeError diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_submit_examples.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_submit_examples.py deleted file mode 100644 index 393b0cc99a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_submit_examples.py +++ /dev/null @@ -1,56 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test whether success submit examples DAG to PythonGatewayService.""" - -import subprocess -from pathlib import Path - -import pytest - -from tests.testing.constants import ignore_exec_examples -from tests.testing.path import path_example - - -@pytest.mark.parametrize( - "example_path", - [ - path - for path in path_example.iterdir() - if path.is_file() and path.stem not in ignore_exec_examples - ], -) -def test_exec_white_list_example(example_path: Path): - """Test execute examples and submit DAG to PythonGatewayService.""" - try: - # Because our task decorator used module ``inspect`` to get the source, and it will - # raise IOError when call it by built-in function ``exec``, so we change to ``subprocess.check_call`` - subprocess.check_call(["python", str(example_path)]) - except subprocess.CalledProcessError: - raise RuntimeError("Run example %s failed.", example_path.stem) - - -def test_exec_multiple_times(): - """Test whether process definition can be executed more than one times.""" - tutorial_path = path_example.joinpath("tutorial.py") - time = 0 - while time < 3: - try: - subprocess.check_call(["python", str(tutorial_path)]) - except subprocess.CalledProcessError: - raise RuntimeError("Run example %s failed.", tutorial_path.stem) - time += 1 diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_tenant.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_tenant.py deleted file mode 100644 index c1ec33c335..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_tenant.py +++ /dev/null @@ -1,86 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test pydolphinscheduler tenant.""" -import pytest - -from pydolphinscheduler.models import Tenant, User - - -def get_user( - name="test-name", - password="test-password", - email="test-email@abc.com", - phone="17366637777", - tenant="test-tenant", - queue="test-queue", - status=1, -): - """Get a test user.""" - user = User(name, password, email, phone, tenant, queue, status) - user.create_if_not_exists() - return user - - -def get_tenant( - name="test-name-1", - queue="test-queue-1", - description="test-description", - tenant_code="test-tenant-code", - user_name=None, -): - """Get a test tenant.""" - tenant = Tenant(name, queue, description, code=tenant_code, user_name=user_name) - tenant.create_if_not_exists(name) - return tenant - - -def test_create_tenant(): - """Test create tenant from java gateway.""" - tenant = get_tenant() - assert tenant.tenant_id is not None - - -def test_get_tenant(): - """Test get tenant from java gateway.""" - tenant = get_tenant() - tenant_ = Tenant.get_tenant(tenant.code) - assert tenant_.tenant_id == tenant.tenant_id - - -def test_update_tenant(): - """Test update tenant from java gateway.""" - tenant = get_tenant(user_name="admin") - tenant.update( - user="admin", - code="test-code-updated", - queue_id=1, - description="test-description-updated", - ) - tenant_ = Tenant.get_tenant(code=tenant.code) - assert tenant_.code == "test-code-updated" - assert tenant_.queue == 1 - - -def test_delete_tenant(): - """Test delete tenant from java gateway.""" - tenant = get_tenant(user_name="admin") - tenant.delete() - with pytest.raises(AttributeError) as excinfo: - _ = tenant.tenant_id - - assert excinfo.type == AttributeError diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_user.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_user.py deleted file mode 100644 index 74248fa8c3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_user.py +++ /dev/null @@ -1,107 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test pydolphinscheduler user.""" - -import hashlib - -import pytest - -from pydolphinscheduler.models import User - - -def md5(str): - """MD5 a string.""" - hl = hashlib.md5() - hl.update(str.encode(encoding="utf-8")) - return hl.hexdigest() - - -def get_user( - name="test-name", - password="test-password", - email="test-email@abc.com", - phone="17366637777", - tenant="test-tenant", - queue="test-queue", - status=1, -): - """Get a test user.""" - user = User( - name=name, - password=password, - email=email, - phone=phone, - tenant=tenant, - queue=queue, - status=status, - ) - user.create_if_not_exists() - return user - - -def test_create_user(): - """Test weather client could connect java gate way or not.""" - user = User( - name="test-name", - password="test-password", - email="test-email@abc.com", - phone="17366637777", - tenant="test-tenant", - queue="test-queue", - status=1, - ) - user.create_if_not_exists() - assert user.user_id is not None - - -def test_get_user(): - """Test get user from java gateway.""" - user = get_user() - user_ = User.get_user(user.user_id) - assert user_.password == md5(user.password) - assert user_.email == user.email - assert user_.phone == user.phone - assert user_.status == user.status - - -def test_update_user(): - """Test update user from java gateway.""" - user = get_user() - user.update( - password="test-password-", - email="test-email-updated@abc.com", - phone="17366637766", - tenant="test-tenant-updated", - queue="test-queue-updated", - status=2, - ) - user_ = User.get_user(user.user_id) - assert user_.password == md5("test-password-") - assert user_.email == "test-email-updated@abc.com" - assert user_.phone == "17366637766" - assert user_.status == 2 - - -def test_delete_user(): - """Test delete user from java gateway.""" - user = get_user() - user.delete() - with pytest.raises(AttributeError) as excinfo: - _ = user.user_id - - assert excinfo.type == AttributeError diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/__init__.py deleted file mode 100644 index 0b6bdf360b..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init resources_plugin package tests.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py deleted file mode 100644 index 1f1a631649..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py +++ /dev/null @@ -1,195 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test github resource plugin.""" -from unittest.mock import PropertyMock, patch - -import pytest - -from pydolphinscheduler.resources_plugin import GitHub -from pydolphinscheduler.resources_plugin.base.git import GitFileInfo - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "user": "apache", - "repo_name": "dolphinscheduler", - "file_path": "script/install.sh", - "api": "https://api.github.com/repos/{user}/{repo_name}/contents/{file_path}", - }, - "https://api.github.com/repos/apache/dolphinscheduler/contents/script/install.sh", - ), - ], -) -def test_github_build_req_api(attr, expected): - """Test the build_req_api function of the github resource plug-in.""" - github = GitHub(prefix="prefix") - assert expected == github.build_req_api(**attr) - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - "https://github.com/apache/dolphinscheduler/blob/dev/script/install.sh", - { - "user": "apache", - "repo_name": "dolphinscheduler", - "branch": "dev", - "file_path": "script/install.sh", - }, - ), - ( - "https://github.com/apache/dolphinscheduler/blob/master/pom.xml", - { - "user": "apache", - "repo_name": "dolphinscheduler", - "branch": "master", - "file_path": "pom.xml", - }, - ), - ( - "https://github.com/apache/dolphinscheduler/blob/1.3.9-release/docker/build/startup.sh", - { - "user": "apache", - "repo_name": "dolphinscheduler", - "branch": "1.3.9-release", - "file_path": "docker/build/startup.sh", - }, - ), - ], -) -def test_github_get_git_file_info(attr, expected): - """Test the get_git_file_info function of the github resource plug-in.""" - github = GitHub(prefix="prefix") - github.get_git_file_info(attr) - assert expected == github._git_file_info.__dict__ - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - ( - { - "user": "apache", - "repo_name": "dolphinscheduler", - "file_path": "docker/build/startup.sh", - } - ), - "https://api.github.com/repos/apache/dolphinscheduler/contents/docker/build/startup.sh", - ), - ( - ( - { - "user": "apache", - "repo_name": "dolphinscheduler", - "file_path": "pom.xml", - } - ), - "https://api.github.com/repos/apache/dolphinscheduler/contents/pom.xml", - ), - ( - ( - { - "user": "apache", - "repo_name": "dolphinscheduler", - "file_path": "script/create-dolphinscheduler.sh", - } - ), - "https://api.github.com/repos/apache/dolphinscheduler/contents/script/create-dolphinscheduler.sh", - ), - ], -) -@patch( - "pydolphinscheduler.resources_plugin.github.GitHub._git_file_info", - new_callable=PropertyMock, -) -def test_github_get_req_url(m_git_file_info, attr, expected): - """Test the get_req_url function of the github resource plug-in.""" - github = GitHub(prefix="prefix") - m_git_file_info.return_value = GitFileInfo(**attr) - assert expected == github.get_req_url() - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "init": {"prefix": "prefix", "access_token": "access_token"}, - "file_path": "github_resource_plugin.sh", - "file_content": "github resource plugin", - }, - "github resource plugin", - ), - ( - { - "init": { - "prefix": "prefix", - }, - "file_path": "github_resource_plugin.sh", - "file_content": "github resource plugin", - }, - "github resource plugin", - ), - ], -) -@patch("pydolphinscheduler.resources_plugin.github.GitHub.req") -def test_github_read_file(m_req, attr, expected): - """Test the read_file function of the github resource plug-in.""" - github = GitHub(**attr.get("init")) - m_req.return_value = attr.get("file_content") - assert expected == github.read_file(attr.get("file_path")) - - -@pytest.mark.skip(reason="Lack of test environment, need stable repository") -@pytest.mark.parametrize( - "attr, expected", - [ - ( - "https://github.com/apache/dolphinscheduler/blob/dev/lombok.config", - "#\n" - "# Licensed to the Apache Software Foundation (ASF) under one or more\n" - "# contributor license agreements. See the NOTICE file distributed with\n" - "# this work for additional information regarding copyright ownership.\n" - "# The ASF licenses this file to You under the Apache License, Version 2.0\n" - '# (the "License"); you may not use this file except in compliance with\n' - "# the License. You may obtain a copy of the License at\n" - "#\n" - "# http://www.apache.org/licenses/LICENSE-2.0\n" - "#\n" - "# Unless required by applicable law or agreed to in writing, software\n" - '# distributed under the License is distributed on an "AS IS" BASIS,\n' - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" - "# See the License for the specific language governing permissions and\n" - "# limitations under the License.\n" - "#\n" - "\n" - "lombok.addLombokGeneratedAnnotation = true\n", - ), - ], -) -def test_github_req(attr, expected): - """Test the req function of the github resource plug-in.""" - github = GitHub( - prefix="prefix", - ) - assert expected == github.req(attr) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_gitlab.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_gitlab.py deleted file mode 100644 index 6bb90acc72..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_gitlab.py +++ /dev/null @@ -1,116 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test github resource plugin.""" -import pytest - -from pydolphinscheduler.resources_plugin.gitlab import GitLab - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - "https://gitlab.com/pydolphinscheduler/ds-gitlab/-/blob/main/union.sh", - { - "branch": "main", - "file_path": "union.sh", - "host": "https://gitlab.com", - "repo_name": "ds-gitlab", - "user": "pydolphinscheduler", - }, - ), - ( - "https://gitlab.com/pydolphinscheduler/ds/-/blob/dev/test/exc.sh", - { - "branch": "dev", - "file_path": "test/exc.sh", - "host": "https://gitlab.com", - "repo_name": "ds", - "user": "pydolphinscheduler", - }, - ), - ], -) -def test_gitlab_get_git_file_info(attr, expected): - """Test the get_file_info function of the gitlab resource plugin.""" - gitlab = GitLab(prefix="prefix") - gitlab.get_git_file_info(attr) - assert expected == gitlab._git_file_info.__dict__ - - -@pytest.mark.skip(reason="This test needs gitlab service") -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "init": { - "prefix": "https://gitlab.com/pydolphinscheduler/ds-internal/-/blob/main", - "oauth_token": "24518bd4cf5bfe9xx", - }, - "file_path": "union.sh", - }, - "test gitlab resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://gitlab.com/pydolphinscheduler/ds/-/blob/main", - "private_token": "9TyTe2xx", - }, - "file_path": "union.sh", - }, - "test gitlab resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://gitlab.com/pydolphinscheduler/ds-gitlab/-/blob/main", - "username": "pydolphinscheduler", - "password": "4295xx", - }, - "file_path": "union.sh", - }, - "test gitlab resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://gitlab.com/pydolphinscheduler/ds-public/-/blob/main", - }, - "file_path": "union.sh", - }, - "test gitlab resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://gitlab.com/pydolphinscheduler/ds-internal/-/blob/main", - "username": "pydolphinscheduler", - "password": "429xxx", - }, - "file_path": "union.sh", - }, - "test gitlab resource plugin\n", - ), - ], -) -def test_gitlab_read_file(attr, expected): - """Test the read_file function of the gitlab resource plug-in.""" - gitlab = GitLab(**attr.get("init")) - assert expected == gitlab.read_file(attr.get("file_path")) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_local.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_local.py deleted file mode 100644 index 82b196f75a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_local.py +++ /dev/null @@ -1,108 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test local resource plugin.""" -from pathlib import Path -from unittest.mock import PropertyMock, patch - -import pytest - -from pydolphinscheduler.core import Task -from pydolphinscheduler.exceptions import PyResPluginException -from pydolphinscheduler.resources_plugin.local import Local -from pydolphinscheduler.utils import file -from tests.testing.file import delete_file - -file_name = "local_res.sh" -file_content = "echo Test local res plugin" -res_plugin_prefix = Path(__file__).parent -file_path = res_plugin_prefix.joinpath(file_name) - - -@pytest.fixture() -def setup_crt_first(): - """Set up and teardown about create file first and then delete it.""" - file.write(content=file_content, to_path=file_path) - yield - delete_file(file_path) - - -@pytest.mark.parametrize( - "val, expected", - [ - (file_name, file_content), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.task.Task.ext", - new_callable=PropertyMock, - return_value={ - ".sh", - }, -) -@patch( - "pydolphinscheduler.core.task.Task.ext_attr", - new_callable=PropertyMock, - return_value="_raw_script", -) -@patch( - "pydolphinscheduler.core.task.Task._raw_script", - create=True, - new_callable=PropertyMock, -) -def test_task_obtain_res_plugin( - m_raw_script, m_ext_attr, m_ext, m_code_version, val, expected, setup_crt_first -): - """Test task obtaining resource plug-in.""" - m_raw_script.return_value = val - task = Task( - name="test_task_ext_attr", - task_type="type", - resource_plugin=Local(str(res_plugin_prefix)), - ) - assert expected == getattr(task, "raw_script") - - -@pytest.mark.parametrize( - "attr, expected", - [({"prefix": res_plugin_prefix, "file_name": file_name}, file_content)], -) -def test_local_res_read_file(attr, expected, setup_crt_first): - """Test the read_file function of the local resource plug-in.""" - local = Local(str(attr.get("prefix"))) - local.read_file(attr.get("file_name")) - assert expected == local.read_file(file_name) - - -@pytest.mark.parametrize( - "attr", - [ - {"prefix": res_plugin_prefix, "file_name": file_name}, - ], -) -def test_local_res_file_not_found(attr): - """Test local resource plugin file does not exist.""" - with pytest.raises( - PyResPluginException, - match=".* is not found", - ): - local = Local(str(attr.get("prefix"))) - local.read_file(attr.get("file_name")) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_oss.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_oss.py deleted file mode 100644 index 7e57e8230e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_oss.py +++ /dev/null @@ -1,112 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test oss resource plugin.""" -import pytest - -from pydolphinscheduler.resources_plugin.oss import OSS - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - "https://ospp-ds-private.oss-cn-hangzhou.aliyuncs.com/a.sh", - { - "endpoint": "https://oss-cn-hangzhou.aliyuncs.com", - "file_path": "a.sh", - "bucket": "ospp-ds-private", - }, - ), - ( - "https://ospp-ds-public.oss-cn-hangzhou.aliyuncs.com/dir/a.sh", - { - "endpoint": "https://oss-cn-hangzhou.aliyuncs.com", - "file_path": "dir/a.sh", - "bucket": "ospp-ds-public", - }, - ), - ], -) -def test_oss_get_bucket_file_info(attr, expected): - """Test the get_bucket_file_info function of the oss resource plugin.""" - oss = OSS(prefix="prefix") - oss.get_bucket_file_info(attr) - assert expected == oss._bucket_file_info.__dict__ - - -@pytest.mark.skip(reason="This test requires OSS services") -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "init": { - "prefix": "https://ospp-ds-private.oss-cn-hangzhou.aliyuncs.com", - "access_key_id": "LTAI5tP25Mxx", - "access_key_secret": "cSur23Qbxx", - }, - "file_path": "a.sh", - }, - "test oss resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://ospp-ds-private.oss-cn-hangzhou.aliyuncs.com/dir/", - "access_key_id": "LTAxx", - "access_key_secret": "cSur23Qxx", - }, - "file_path": "b.sh", - }, - "test oss resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://ospp-ds-private.oss-cn-hangzhou.aliyuncs.com", - }, - "file_path": "b.sh", - }, - "test oss resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://ospp-ds-public.oss-cn-hangzhou.aliyuncs.com", - }, - "file_path": "b.sh", - }, - "test oss resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://ospp-ds-public.oss-cn-hangzhou.aliyuncs.com/dir/", - "access_key_id": "LTAIxx", - "access_key_secret": "cSurxx", - }, - "file_path": "a.sh", - }, - "test oss resource plugin\n", - ), - ], -) -def test_oss_read_file(attr, expected): - """Test the read_file function of the oss resource plug-in.""" - oss = OSS(**attr.get("init")) - assert expected == oss.read_file(attr.get("file_path")) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_resource_plugin.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_resource_plugin.py deleted file mode 100644 index 63e619a600..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_resource_plugin.py +++ /dev/null @@ -1,75 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test abstract class resource_plugin.""" - -import pytest - -from pydolphinscheduler.exceptions import PyResPluginException -from pydolphinscheduler.resources_plugin import GitHub - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "s": "https://api.github.com/repos/apache/dolphinscheduler/contents/script/install.sh", - "x": "/", - "n": 2, - }, - 7, - ), - ( - { - "s": "https://api.github.com", - "x": ":", - "n": 1, - }, - 5, - ), - ], -) -def test_github_get_index(attr, expected): - """Test the get_index function of the abstract class resource_plugin.""" - github = GitHub(prefix="prefix") - assert expected == github.get_index(**attr) - - -@pytest.mark.parametrize( - "attr", - [ - { - "s": "https://api.github.com", - "x": "/", - "n": 3, - }, - { - "s": "https://api.github.com/", - "x": "/", - "n": 4, - }, - ], -) -def test_github_get_index_exception(attr): - """Test exception to get_index function of abstract class resource_plugin.""" - with pytest.raises( - PyResPluginException, - match="Incomplete path.", - ): - github = GitHub(prefix="prefix") - github.get_index(**attr) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_s3.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_s3.py deleted file mode 100644 index 5f75f3eb75..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_s3.py +++ /dev/null @@ -1,79 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test oss resource plugin.""" -import pytest - -from pydolphinscheduler.resources_plugin import S3 - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - "https://ds-resource-plugin-private.s3.amazonaws.com/a.sh", - { - "file_path": "a.sh", - "bucket": "ds-resource-plugin-private", - }, - ), - ( - "https://ds-resource-plugin-public.s3.amazonaws.com/dir/a.sh", - { - "file_path": "dir/a.sh", - "bucket": "ds-resource-plugin-public", - }, - ), - ], -) -def test_s3_get_bucket_file_info(attr, expected): - """Test the get_bucket_file_info function of the s3 resource plugin.""" - s3 = S3(prefix="prefix") - s3.get_bucket_file_info(attr) - assert expected == s3._bucket_file_info.__dict__ - - -@pytest.mark.skip(reason="This test requires s3 services") -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "init": { - "prefix": "https://ds-resource-plugin-private.s3.amazonaws.com/dir/", - "access_key_id": "LTAI5tP25Mxx", - "access_key_secret": "cSur23Qbxx", - }, - "file_path": "a.sh", - }, - "test s3 resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://ds-resource-plugin-public.s3.amazonaws.com/", - }, - "file_path": "a.sh", - }, - "test s3 resource plugin\n", - ), - ], -) -def test_s3_read_file(attr, expected): - """Test the read_file function of the s3 resource plug-in.""" - s3 = S3(**attr.get("init")) - assert expected == s3.read_file(attr.get("file_path")) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/__init__.py deleted file mode 100644 index 095e3013e5..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init tasks package tests.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py deleted file mode 100644 index 72eec28ed7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py +++ /dev/null @@ -1,461 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task dependent.""" -from typing import List, Tuple -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.tasks.condition import ( - FAILURE, - SUCCESS, - And, - Condition, - ConditionOperator, - Or, - Status, -) -from tests.testing.task import Task - -TEST_NAME = "test-name" -TEST_PROJECT = "test-project" -TEST_PROCESS_DEFINITION = "test-process-definition" -TEST_TYPE = "test-type" -TEST_PROJECT_CODE, TEST_DEFINITION_CODE, TEST_TASK_CODE = 12345, 123456, 1234567 - -TEST_OPERATOR_LIST = ("AND", "OR") - - -@pytest.mark.parametrize( - "obj, expect", - [ - (Status, "STATUS"), - (SUCCESS, "SUCCESS"), - (FAILURE, "FAILURE"), - ], -) -def test_class_status_status_name(obj: Status, expect: str): - """Test class status and sub class property status_name.""" - assert obj.status_name() == expect - - -@pytest.mark.parametrize( - "obj, tasks", - [ - (Status, (1, 2, 3)), - (SUCCESS, (1.1, 2.2, 3.3)), - (FAILURE, (ConditionOperator(1), ConditionOperator(2), ConditionOperator(3))), - ], -) -def test_class_status_depend_item_list_no_expect_type(obj: Status, tasks: Tuple): - """Test class status and sub class raise error when assign not support type.""" - with pytest.raises( - PyDSParamException, match=".*?only accept class Task or sub class Task, but get" - ): - obj(*tasks).get_define() - - -@pytest.mark.parametrize( - "obj, tasks", - [ - (Status, [Task(str(i), TEST_TYPE) for i in range(1)]), - (Status, [Task(str(i), TEST_TYPE) for i in range(2)]), - (Status, [Task(str(i), TEST_TYPE) for i in range(3)]), - (SUCCESS, [Task(str(i), TEST_TYPE) for i in range(1)]), - (SUCCESS, [Task(str(i), TEST_TYPE) for i in range(2)]), - (SUCCESS, [Task(str(i), TEST_TYPE) for i in range(3)]), - (FAILURE, [Task(str(i), TEST_TYPE) for i in range(1)]), - (FAILURE, [Task(str(i), TEST_TYPE) for i in range(2)]), - (FAILURE, [Task(str(i), TEST_TYPE) for i in range(3)]), - ], -) -def test_class_status_depend_item_list(obj: Status, tasks: Tuple): - """Test class status and sub class function :func:`depend_item_list`.""" - status = obj.status_name() - expect = [ - { - "depTaskCode": i.code, - "status": status, - } - for i in tasks - ] - assert obj(*tasks).get_define() == expect - - -@pytest.mark.parametrize( - "obj, expect", - [ - (ConditionOperator, "CONDITIONOPERATOR"), - (And, "AND"), - (Or, "OR"), - ], -) -def test_condition_operator_operator_name(obj: ConditionOperator, expect: str): - """Test class ConditionOperator and sub class class function :func:`operator_name`.""" - assert obj.operator_name() == expect - - -@pytest.mark.parametrize( - "obj, expect", - [ - (ConditionOperator, "CONDITIONOPERATOR"), - (And, "AND"), - (Or, "OR"), - ], -) -def test_condition_operator_relation(obj: ConditionOperator, expect: str): - """Test class ConditionOperator and sub class class property `relation`.""" - assert obj(1).relation == expect - - -@pytest.mark.parametrize( - "obj, status_or_operator, match", - [ - ( - ConditionOperator, - [Status(Task("1", TEST_TYPE)), 1], - ".*?operator parameter support ConditionTask and ConditionOperator.*?", - ), - ( - ConditionOperator, - [ - Status(Task("1", TEST_TYPE)), - 1.0, - ], - ".*?operator parameter support ConditionTask and ConditionOperator.*?", - ), - ( - ConditionOperator, - [ - Status(Task("1", TEST_TYPE)), - ConditionOperator(And(Status(Task("1", TEST_TYPE)))), - ], - ".*?operator parameter only support same type.", - ), - ( - ConditionOperator, - [ - ConditionOperator(And(Status(Task("1", TEST_TYPE)))), - Status(Task("1", TEST_TYPE)), - ], - ".*?operator parameter only support same type.", - ), - ], -) -def test_condition_operator_set_define_attr_not_support_type( - obj, status_or_operator, match -): - """Test class ConditionOperator parameter error, including parameter not same or type not support.""" - with pytest.raises(PyDSParamException, match=match): - op = obj(*status_or_operator) - op.set_define_attr() - - -@pytest.mark.parametrize( - "obj, task_num", - [ - (ConditionOperator, 1), - (ConditionOperator, 2), - (ConditionOperator, 3), - (And, 1), - (And, 2), - (And, 3), - (Or, 1), - (Or, 2), - (Or, 3), - ], -) -def test_condition_operator_set_define_attr_status( - obj: ConditionOperator, task_num: int -): - """Test :func:`set_define_attr` with one or more class status.""" - attr = "depend_item_list" - - tasks = [Task(str(i), TEST_TYPE) for i in range(task_num)] - status = Status(*tasks) - - expect = [ - {"depTaskCode": task.code, "status": status.status_name()} for task in tasks - ] - - co = obj(status) - co.set_define_attr() - assert getattr(co, attr) == expect - - -@pytest.mark.parametrize( - "obj, status", - [ - (ConditionOperator, (SUCCESS, SUCCESS)), - (ConditionOperator, (FAILURE, FAILURE)), - (ConditionOperator, (SUCCESS, FAILURE)), - (ConditionOperator, (FAILURE, SUCCESS)), - (And, (SUCCESS, SUCCESS)), - (And, (FAILURE, FAILURE)), - (And, (SUCCESS, FAILURE)), - (And, (FAILURE, SUCCESS)), - (Or, (SUCCESS, SUCCESS)), - (Or, (FAILURE, FAILURE)), - (Or, (SUCCESS, FAILURE)), - (Or, (FAILURE, SUCCESS)), - ], -) -def test_condition_operator_set_define_attr_mix_status( - obj: ConditionOperator, status: List[Status] -): - """Test :func:`set_define_attr` with one or more mixed status.""" - attr = "depend_item_list" - - task = Task("test-operator", TEST_TYPE) - status_list = [] - expect = [] - for sta in status: - status_list.append(sta(task)) - expect.append({"depTaskCode": task.code, "status": sta.status_name()}) - - co = obj(*status_list) - co.set_define_attr() - assert getattr(co, attr) == expect - - -@pytest.mark.parametrize( - "obj, task_num", - [ - (ConditionOperator, 1), - (ConditionOperator, 2), - (ConditionOperator, 3), - (And, 1), - (And, 2), - (And, 3), - (Or, 1), - (Or, 2), - (Or, 3), - ], -) -def test_condition_operator_set_define_attr_operator( - obj: ConditionOperator, task_num: int -): - """Test :func:`set_define_attr` with one or more class condition operator.""" - attr = "depend_task_list" - - task = Task("test-operator", TEST_TYPE) - status = Status(task) - - expect = [ - { - "relation": obj.operator_name(), - "dependItemList": [ - { - "depTaskCode": task.code, - "status": status.status_name(), - } - ], - } - for _ in range(task_num) - ] - - co = obj(*[obj(status) for _ in range(task_num)]) - co.set_define_attr() - assert getattr(co, attr) == expect - - -@pytest.mark.parametrize( - "cond, sub_cond", - [ - (ConditionOperator, (And, Or)), - (ConditionOperator, (Or, And)), - (And, (And, Or)), - (And, (Or, And)), - (Or, (And, Or)), - (Or, (Or, And)), - ], -) -def test_condition_operator_set_define_attr_mix_operator( - cond: ConditionOperator, sub_cond: Tuple[ConditionOperator] -): - """Test :func:`set_define_attr` with one or more class mix condition operator.""" - attr = "depend_task_list" - - task = Task("test-operator", TEST_TYPE) - - expect = [] - sub_condition = [] - for cond in sub_cond: - status = Status(task) - sub_condition.append(cond(status)) - expect.append( - { - "relation": cond.operator_name(), - "dependItemList": [ - { - "depTaskCode": task.code, - "status": status.status_name(), - } - ], - } - ) - co = cond(*sub_condition) - co.set_define_attr() - assert getattr(co, attr) == expect - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(12345, 1), -) -@patch( - "pydolphinscheduler.tasks.condition.Condition.gen_code_and_version", - return_value=(123, 1), -) -def test_condition_get_define(mock_condition_code_version, mock_task_code_version): - """Test task condition :func:`get_define`.""" - common_task = Task(name="common_task", task_type="test_task_condition") - cond_operator = And( - And( - SUCCESS(common_task, common_task), - FAILURE(common_task, common_task), - ), - Or( - SUCCESS(common_task, common_task), - FAILURE(common_task, common_task), - ), - ) - - name = "test_condition_get_define" - expect = { - "code": 123, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "CONDITIONS", - "taskParams": { - "resourceList": [], - "localParams": [], - "dependence": { - "relation": "AND", - "dependTaskList": [ - { - "relation": "AND", - "dependItemList": [ - {"depTaskCode": common_task.code, "status": "SUCCESS"}, - {"depTaskCode": common_task.code, "status": "SUCCESS"}, - {"depTaskCode": common_task.code, "status": "FAILURE"}, - {"depTaskCode": common_task.code, "status": "FAILURE"}, - ], - }, - { - "relation": "OR", - "dependItemList": [ - {"depTaskCode": common_task.code, "status": "SUCCESS"}, - {"depTaskCode": common_task.code, "status": "SUCCESS"}, - {"depTaskCode": common_task.code, "status": "FAILURE"}, - {"depTaskCode": common_task.code, "status": "FAILURE"}, - ], - }, - ], - }, - "conditionResult": { - "successNode": [common_task.code], - "failedNode": [common_task.code], - }, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - - task = Condition( - name, condition=cond_operator, success_task=common_task, failed_task=common_task - ) - assert task.get_define() == expect - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_condition_set_dep_workflow(mock_task_code_version): - """Test task condition set dependence in workflow level.""" - with ProcessDefinition(name="test-condition-set-dep-workflow") as pd: - pre_task_1 = Task(name="pre_task_1", task_type=TEST_TYPE) - pre_task_2 = Task(name="pre_task_2", task_type=TEST_TYPE) - pre_task_3 = Task(name="pre_task_3", task_type=TEST_TYPE) - cond_operator = And( - And( - SUCCESS(pre_task_1, pre_task_2), - FAILURE(pre_task_3), - ), - ) - - success_branch = Task(name="success_branch", task_type=TEST_TYPE) - fail_branch = Task(name="fail_branch", task_type=TEST_TYPE) - - condition = Condition( - name="conditions", - condition=cond_operator, - success_task=success_branch, - failed_task=fail_branch, - ) - - # General tasks test - assert len(pd.tasks) == 6 - assert sorted(pd.task_list, key=lambda t: t.name) == sorted( - [ - pre_task_1, - pre_task_2, - pre_task_3, - success_branch, - fail_branch, - condition, - ], - key=lambda t: t.name, - ) - # Task dep test - assert success_branch._upstream_task_codes == {condition.code} - assert fail_branch._upstream_task_codes == {condition.code} - assert condition._downstream_task_codes == { - success_branch.code, - fail_branch.code, - } - - # Condition task dep after ProcessDefinition function get_define called - assert condition._upstream_task_codes == { - pre_task_1.code, - pre_task_2.code, - pre_task_3.code, - } - assert all( - [ - child._downstream_task_codes == {condition.code} - for child in [ - pre_task_1, - pre_task_2, - pre_task_3, - ] - ] - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_datax.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_datax.py deleted file mode 100644 index 95f65b3155..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_datax.py +++ /dev/null @@ -1,213 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task DataX.""" -from pathlib import Path -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.resources_plugin import Local -from pydolphinscheduler.tasks.datax import CustomDataX, DataX -from pydolphinscheduler.utils import file -from tests.testing.file import delete_file - - -@pytest.fixture() -def setup_crt_first(request): - """Set up and teardown about create file first and then delete it.""" - file_content = request.param.get("file_content") - file_path = request.param.get("file_path") - file.write( - content=file_content, - to_path=file_path, - ) - yield - delete_file(file_path) - - -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "MYSQL"}), -) -def test_datax_get_define(mock_datasource): - """Test task datax function get_define.""" - code = 123 - version = 1 - name = "test_datax_get_define" - command = "select name from test_source_table_name" - datasource_name = "test_datasource" - datatarget_name = "test_datatarget" - target_table = "test_target_table_name" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "DATAX", - "taskParams": { - "customConfig": 0, - "dsType": "MYSQL", - "dataSource": 1, - "dtType": "MYSQL", - "dataTarget": 1, - "sql": command, - "targetTable": target_table, - "jobSpeedByte": 0, - "jobSpeedRecord": 1000, - "xms": 1, - "xmx": 1, - "preStatements": [], - "postStatements": [], - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = DataX(name, datasource_name, datatarget_name, command, target_table) - assert task.get_define() == expect - - -@pytest.mark.parametrize("json_template", ["json_template"]) -def test_custom_datax_get_define(json_template): - """Test task custom datax function get_define.""" - code = 123 - version = 1 - name = "test_custom_datax_get_define" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "DATAX", - "taskParams": { - "customConfig": 1, - "json": json_template, - "xms": 1, - "xmx": 1, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = CustomDataX(name, json_template) - assert task.get_define() == expect - - -@pytest.mark.parametrize( - "setup_crt_first", - [ - { - "file_path": Path(__file__).parent.joinpath("local_res.sql"), - "file_content": "test local resource", - } - ], - indirect=True, -) -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "task_datax", - "datasource_name": "first_mysql", - "datatarget_name": "second_mysql", - "sql": "local_res.sql", - "target_table": "target_table", - "resource_plugin": Local(str(Path(__file__).parent)), - }, - "test local resource", - ), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_resources_local_datax_command_content( - mock_code_version, attr, expect, setup_crt_first -): - """Test task datax sql content through the local resource plug-in.""" - datax = DataX(**attr) - assert expect == getattr(datax, "sql") - - -@pytest.mark.parametrize( - "setup_crt_first", - [ - { - "file_path": Path(__file__).parent.joinpath("local_res.json"), - "file_content": '{content: "test local resource"}', - } - ], - indirect=True, -) -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "task_custom_datax", - "json": "local_res.json", - "resource_plugin": Local(str(Path(__file__).parent)), - }, - '{content: "test local resource"}', - ), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_resources_local_custom_datax_command_content( - mock_code_version, attr, expect, setup_crt_first -): - """Test task CustomDataX json content through the local resource plug-in.""" - custom_datax = CustomDataX(**attr) - assert expect == getattr(custom_datax, "json") diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dependent.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dependent.py deleted file mode 100644 index f55700e04b..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dependent.py +++ /dev/null @@ -1,794 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task dependent.""" -import itertools -from typing import Dict, List, Optional, Tuple, Union -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.tasks.dependent import ( - And, - Dependent, - DependentDate, - DependentItem, - DependentOperator, - Or, -) - -TEST_PROJECT = "test-project" -TEST_PROCESS_DEFINITION = "test-process-definition" -TEST_TASK = "test-task" -TEST_PROJECT_CODE, TEST_DEFINITION_CODE, TEST_TASK_CODE = 12345, 123456, 1234567 - -TEST_OPERATOR_LIST = ("AND", "OR") - - -@pytest.mark.parametrize( - "dep_date, dep_cycle", - [ - # hour - (DependentDate.CURRENT_HOUR, "hour"), - (DependentDate.LAST_ONE_HOUR, "hour"), - (DependentDate.LAST_TWO_HOURS, "hour"), - (DependentDate.LAST_THREE_HOURS, "hour"), - (DependentDate.LAST_TWENTY_FOUR_HOURS, "hour"), - # day - (DependentDate.TODAY, "day"), - (DependentDate.LAST_ONE_DAYS, "day"), - (DependentDate.LAST_TWO_DAYS, "day"), - (DependentDate.LAST_THREE_DAYS, "day"), - (DependentDate.LAST_SEVEN_DAYS, "day"), - # week - (DependentDate.THIS_WEEK, "week"), - (DependentDate.LAST_WEEK, "week"), - (DependentDate.LAST_MONDAY, "week"), - (DependentDate.LAST_TUESDAY, "week"), - (DependentDate.LAST_WEDNESDAY, "week"), - (DependentDate.LAST_THURSDAY, "week"), - (DependentDate.LAST_FRIDAY, "week"), - (DependentDate.LAST_SATURDAY, "week"), - (DependentDate.LAST_SUNDAY, "week"), - # month - (DependentDate.THIS_MONTH, "month"), - (DependentDate.LAST_MONTH, "month"), - (DependentDate.LAST_MONTH_BEGIN, "month"), - (DependentDate.LAST_MONTH_END, "month"), - ], -) -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": TEST_PROJECT_CODE, - "processDefinitionCode": TEST_DEFINITION_CODE, - "taskDefinitionCode": TEST_TASK_CODE, - }, -) -def test_dependent_item_get_define(mock_task_info, dep_date, dep_cycle): - """Test dependent.DependentItem get define. - - Here we have test some cases as below. - ```py - { - "projectCode": "project code", - "definitionCode": "definition code", - "depTaskCode": "dep task code", - "cycle": "day", - "dateValue": "today" - } - ``` - """ - attr = { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": dep_date, - } - expect = { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": dep_cycle, - "dateValue": dep_date, - } - task = DependentItem(**attr) - assert expect == task.get_define() - - -def test_dependent_item_date_error(): - """Test error when pass None to dependent_date.""" - with pytest.raises( - PyDSParamException, match="Parameter dependent_date must provider.*?" - ): - DependentItem( - project_name=TEST_PROJECT, - process_definition_name=TEST_PROCESS_DEFINITION, - dependent_date=None, - ) - - -@pytest.mark.parametrize( - "task_name, result", - [ - ({"dependent_task_name": TEST_TASK}, TEST_TASK), - ({}, None), - ], -) -def test_dependent_item_code_parameter(task_name: dict, result: Optional[str]): - """Test dependent item property code_parameter.""" - dependent_item = DependentItem( - project_name=TEST_PROJECT, - process_definition_name=TEST_PROCESS_DEFINITION, - **task_name, - ) - expect = (TEST_PROJECT, TEST_PROCESS_DEFINITION, result) - assert dependent_item.code_parameter == expect - - -@pytest.mark.parametrize( - "arg_list", - [ - [1, 2], - [ - DependentItem( - project_name=TEST_PROJECT, - process_definition_name=TEST_PROCESS_DEFINITION, - ), - 1, - ], - [ - And( - DependentItem( - project_name=TEST_PROJECT, - process_definition_name=TEST_PROCESS_DEFINITION, - ) - ), - 1, - ], - [ - DependentItem( - project_name=TEST_PROJECT, - process_definition_name=TEST_PROCESS_DEFINITION, - ), - And( - DependentItem( - project_name=TEST_PROJECT, - process_definition_name=TEST_PROCESS_DEFINITION, - ) - ), - ], - ], -) -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": TEST_PROJECT_CODE, - "processDefinitionCode": TEST_DEFINITION_CODE, - "taskDefinitionCode": TEST_TASK_CODE, - }, -) -def test_dependent_operator_set_define_error(mock_code, arg_list): - """Test dependent operator function :func:`set_define` with not support type.""" - dep_op = DependentOperator(*arg_list) - with pytest.raises(PyDSParamException, match="Dependent .*? operator.*?"): - dep_op.set_define_attr() - - -@pytest.mark.parametrize( - # Test dependent operator, Test dependent item parameters, expect operator define - "operators, kwargs, expect", - [ - # Test dependent operator (And | Or) with single dependent item - ( - (And, Or), - ( - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - ), - [ - { - "relation": op, - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - ], - } - for op in TEST_OPERATOR_LIST - ], - ), - # Test dependent operator (And | Or) with two dependent item - ( - (And, Or), - ( - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_WEEK, - }, - ), - [ - { - "relation": op, - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "week", - "dateValue": DependentDate.LAST_WEEK, - }, - ], - } - for op in TEST_OPERATOR_LIST - ], - ), - # Test dependent operator (And | Or) with multiply dependent item - ( - (And, Or), - ( - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_WEEK, - }, - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_ONE_DAYS, - }, - ), - [ - { - "relation": op, - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "week", - "dateValue": DependentDate.LAST_WEEK, - }, - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "day", - "dateValue": DependentDate.LAST_ONE_DAYS, - }, - ], - } - for op in TEST_OPERATOR_LIST - ], - ), - ], -) -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": TEST_PROJECT_CODE, - "processDefinitionCode": TEST_DEFINITION_CODE, - "taskDefinitionCode": TEST_TASK_CODE, - }, -) -def test_operator_dependent_item( - mock_code_info, - operators: Tuple[DependentOperator], - kwargs: Tuple[dict], - expect: List[Dict], -): - """Test DependentOperator(DependentItem) function get_define. - - Here we have test some cases as below, including single dependentItem and multiply dependentItem. - ```py - { - "relation": "AND", - "dependItemList": [ - { - "projectCode": "project code", - "definitionCode": "definition code", - "depTaskCode": "dep task code", - "cycle": "day", - "dateValue": "today" - }, - ... - ] - } - ``` - """ - for idx, operator in enumerate(operators): - # Use variable to keep one or more dependent item to test dependent operator behavior - dependent_item_list = [] - for kwarg in kwargs: - dependent_item = DependentItem(**kwarg) - dependent_item_list.append(dependent_item) - op = operator(*dependent_item_list) - assert expect[idx] == op.get_define() - - -@pytest.mark.parametrize( - # Test dependent operator, Test dependent item parameters, expect operator define - "operators, args, expect", - [ - # Test dependent operator (And | Or) with single dependent task list - ( - (And, Or), - ( - (And, Or), - ( - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - ), - ), - [ - { - "relation": par_op, - "dependTaskList": [ - { - "relation": chr_op, - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - ], - } - ], - } - for (par_op, chr_op) in itertools.product( - TEST_OPERATOR_LIST, TEST_OPERATOR_LIST - ) - ], - ), - # Test dependent operator (And | Or) with two dependent task list - ( - (And, Or), - ( - (And, Or), - ( - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_WEEK, - }, - ), - ), - [ - { - "relation": par_op, - "dependTaskList": [ - { - "relation": chr_op, - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "week", - "dateValue": DependentDate.LAST_WEEK, - }, - ], - } - ], - } - for (par_op, chr_op) in itertools.product( - TEST_OPERATOR_LIST, TEST_OPERATOR_LIST - ) - ], - ), - # Test dependent operator (And | Or) with multiply dependent task list - ( - (And, Or), - ( - (And, Or), - ( - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_WEEK, - }, - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_ONE_DAYS, - }, - ), - ), - [ - { - "relation": par_op, - "dependTaskList": [ - { - "relation": chr_op, - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "week", - "dateValue": DependentDate.LAST_WEEK, - }, - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "day", - "dateValue": DependentDate.LAST_ONE_DAYS, - }, - ], - } - ], - } - for (par_op, chr_op) in itertools.product( - TEST_OPERATOR_LIST, TEST_OPERATOR_LIST - ) - ], - ), - ], -) -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": TEST_PROJECT_CODE, - "processDefinitionCode": TEST_DEFINITION_CODE, - "taskDefinitionCode": TEST_TASK_CODE, - }, -) -def test_operator_dependent_task_list_multi_dependent_item( - mock_code_info, - operators: Tuple[DependentOperator], - args: Tuple[Union[Tuple, dict]], - expect: List[Dict], -): - """Test DependentOperator(DependentOperator(DependentItem)) single operator function get_define. - - Here we have test some cases as below. This test case only test single DependTaskList with one or - multiply dependItemList. - ```py - { - "relation": "OR", - "dependTaskList": [ - { - "relation": "AND", - "dependItemList": [ - { - "projectCode": "project code", - "definitionCode": "definition code", - "depTaskCode": "dep task code", - "cycle": "day", - "dateValue": "today" - }, - ... - ] - }, - ] - } - ``` - """ - # variable expect_idx record idx should be use to get specific expect - expect_idx = 0 - - for op_idx, operator in enumerate(operators): - dependent_operator = args[0] - dependent_item_kwargs = args[1] - - for dop_idx, dpt_op in enumerate(dependent_operator): - dependent_item_list = [] - for dpt_kwargs in dependent_item_kwargs: - dpti = DependentItem(**dpt_kwargs) - dependent_item_list.append(dpti) - child_dep_op = dpt_op(*dependent_item_list) - op = operator(child_dep_op) - assert expect[expect_idx] == op.get_define() - expect_idx += 1 - - -def get_dep_task_list(*operator): - """Return dependent task list from given operators list.""" - result = [] - for op in operator: - result.append( - { - "relation": op.operator_name(), - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - ], - } - ) - return result - - -@pytest.mark.parametrize( - # Test dependent operator, Test dependent item parameters, expect operator define - "operators, args, expect", - [ - # Test dependent operator (And | Or) with two dependent task list - ( - (And, Or), - ( - ((And, And), (And, Or), (Or, And), (Or, Or)), - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - ), - [ - { - "relation": parent_op.operator_name(), - "dependTaskList": get_dep_task_list(*child_ops), - } - for parent_op in (And, Or) - for child_ops in ((And, And), (And, Or), (Or, And), (Or, Or)) - ], - ), - # Test dependent operator (And | Or) with multiple dependent task list - ( - (And, Or), - ( - ((And, And, And), (And, And, And, And), (And, And, And, And, And)), - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - ), - [ - { - "relation": parent_op.operator_name(), - "dependTaskList": get_dep_task_list(*child_ops), - } - for parent_op in (And, Or) - for child_ops in ( - (And, And, And), - (And, And, And, And), - (And, And, And, And, And), - ) - ], - ), - ], -) -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": TEST_PROJECT_CODE, - "processDefinitionCode": TEST_DEFINITION_CODE, - "taskDefinitionCode": TEST_TASK_CODE, - }, -) -def test_operator_dependent_task_list_multi_dependent_list( - mock_code_info, - operators: Tuple[DependentOperator], - args: Tuple[Union[Tuple, dict]], - expect: List[Dict], -): - """Test DependentOperator(DependentOperator(DependentItem)) multiply operator function get_define. - - Here we have test some cases as below. This test case only test single DependTaskList with one or - multiply dependTaskList. - ```py - { - "relation": "OR", - "dependTaskList": [ - { - "relation": "AND", - "dependItemList": [ - { - "projectCode": "project code", - "definitionCode": "definition code", - "depTaskCode": "dep task code", - "cycle": "day", - "dateValue": "today" - } - ] - }, - ... - ] - } - ``` - """ - # variable expect_idx record idx should be use to get specific expect - expect_idx = 0 - for op_idx, operator in enumerate(operators): - dependent_operator = args[0] - dependent_item_kwargs = args[1] - - for dop_idx, dpt_ops in enumerate(dependent_operator): - dependent_task_list = [ - dpt_op(DependentItem(**dependent_item_kwargs)) for dpt_op in dpt_ops - ] - op = operator(*dependent_task_list) - assert ( - expect[expect_idx] == op.get_define() - ), f"Failed with operator syntax {operator}.{dpt_ops}" - expect_idx += 1 - - -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": TEST_PROJECT_CODE, - "processDefinitionCode": TEST_DEFINITION_CODE, - "taskDefinitionCode": TEST_TASK_CODE, - }, -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_dependent_get_define(mock_code_version, mock_dep_code): - """Test task dependent function get_define.""" - project_name = "test-dep-project" - process_definition_name = "test-dep-definition" - dependent_task_name = "test-dep-task" - dep_operator = And( - Or( - # test dependence with add tasks - DependentItem( - project_name=project_name, - process_definition_name=process_definition_name, - ) - ), - And( - # test dependence with specific task - DependentItem( - project_name=project_name, - process_definition_name=process_definition_name, - dependent_task_name=dependent_task_name, - ) - ), - ) - - name = "test_dependent_get_define" - expect = { - "code": 123, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "DEPENDENT", - "taskParams": { - "resourceList": [], - "localParams": [], - "dependence": { - "relation": "AND", - "dependTaskList": [ - { - "relation": "OR", - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": "0", - "cycle": "day", - "dateValue": "today", - } - ], - }, - { - "relation": "AND", - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "day", - "dateValue": "today", - } - ], - }, - ], - }, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - - task = Dependent(name, dependence=dep_operator) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dvc.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dvc.py deleted file mode 100644 index 815d896234..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dvc.py +++ /dev/null @@ -1,173 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Dvc.""" -from unittest.mock import patch - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.tasks.dvc import DVCDownload, DVCInit, DvcTaskType, DVCUpload - -repository = "git@github.com:/dvc-data-repository-example.git" - - -def test_dvc_init_get_define(): - """Test task dvc init function get_define.""" - name = "test_dvc_init" - dvc_store_url = "~/dvc_data" - - code = 123 - version = 1 - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": TaskType.DVC, - "taskParams": { - "resourceList": [], - "localParams": [], - "dvcTaskType": DvcTaskType.INIT, - "dvcRepository": repository, - "dvcStoreUrl": dvc_store_url, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - dvc_init = DVCInit(name, repository, dvc_store_url) - assert dvc_init.get_define() == expect - - -def test_dvc_upload_get_define(): - """Test task dvc upload function get_define.""" - name = "test_dvc_upload" - data_path_in_dvc_repository = "iris" - data_path_in_worker = "~/source/iris" - version = "v1" - message = "upload iris data v1" - - code = 123 - version = 1 - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": TaskType.DVC, - "taskParams": { - "resourceList": [], - "localParams": [], - "dvcTaskType": DvcTaskType.UPLOAD, - "dvcRepository": repository, - "dvcDataLocation": data_path_in_dvc_repository, - "dvcLoadSaveDataPath": data_path_in_worker, - "dvcVersion": version, - "dvcMessage": message, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - dvc_upload = DVCUpload( - name, - repository=repository, - data_path_in_dvc_repository=data_path_in_dvc_repository, - data_path_in_worker=data_path_in_worker, - version=version, - message=message, - ) - assert dvc_upload.get_define() == expect - - -def test_dvc_download_get_define(): - """Test task dvc download function get_define.""" - name = "test_dvc_upload" - data_path_in_dvc_repository = "iris" - data_path_in_worker = "~/target/iris" - version = "v1" - - code = 123 - version = 1 - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": TaskType.DVC, - "taskParams": { - "resourceList": [], - "localParams": [], - "dvcTaskType": DvcTaskType.DOWNLOAD, - "dvcRepository": repository, - "dvcDataLocation": data_path_in_dvc_repository, - "dvcLoadSaveDataPath": data_path_in_worker, - "dvcVersion": version, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - dvc_download = DVCDownload( - name, - repository=repository, - data_path_in_dvc_repository=data_path_in_dvc_repository, - data_path_in_worker=data_path_in_worker, - version=version, - ) - assert dvc_download.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_flink.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_flink.py deleted file mode 100644 index 2f30a494b9..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_flink.py +++ /dev/null @@ -1,83 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Flink.""" - -from unittest.mock import patch - -from pydolphinscheduler.tasks.flink import DeployMode, Flink, FlinkVersion, ProgramType - - -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "test"}), -) -def test_flink_get_define(mock_resource): - """Test task flink function get_define.""" - code = 123 - version = 1 - name = "test_flink_get_define" - main_class = "org.apache.flink.test_main_class" - main_package = "test_main_package" - program_type = ProgramType.JAVA - deploy_mode = DeployMode.LOCAL - - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "FLINK", - "taskParams": { - "mainClass": main_class, - "mainJar": { - "id": 1, - }, - "programType": program_type, - "deployMode": deploy_mode, - "flinkVersion": FlinkVersion.LOW_VERSION, - "slot": 1, - "parallelism": 1, - "taskManager": 2, - "jobManagerMemory": "1G", - "taskManagerMemory": "2G", - "appName": None, - "mainArgs": None, - "others": None, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = Flink(name, main_class, main_package, program_type, deploy_mode) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_func_wrap.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_func_wrap.py deleted file mode 100644 index 628b6e7f86..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_func_wrap.py +++ /dev/null @@ -1,169 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test module about function wrap task decorator.""" - -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.tasks.func_wrap import task -from tests.testing.decorator import foo as foo_decorator -from tests.testing.task import Task - -PD_NAME = "test_process_definition" -TASK_NAME = "test_task" - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", return_value=(12345, 1) -) -def test_single_task_outside(mock_code): - """Test single decorator task which outside process definition.""" - - @task - def foo(): - print(TASK_NAME) - - with ProcessDefinition(PD_NAME) as pd: - foo() - - assert pd is not None and pd.name == PD_NAME - assert len(pd.tasks) == 1 - - pd_task = pd.tasks[12345] - assert pd_task.name == "foo" - assert pd_task.raw_script == "def foo():\n print(TASK_NAME)\nfoo()" - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", return_value=(12345, 1) -) -def test_single_task_inside(mock_code): - """Test single decorator task which inside process definition.""" - with ProcessDefinition(PD_NAME) as pd: - - @task - def foo(): - print(TASK_NAME) - - foo() - - assert pd is not None and pd.name == PD_NAME - assert len(pd.tasks) == 1 - - pd_task = pd.tasks[12345] - assert pd_task.name == "foo" - assert pd_task.raw_script == "def foo():\n print(TASK_NAME)\nfoo()" - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", return_value=(12345, 1) -) -def test_addition_decorator_error(mock_code): - """Test error when using task decorator to a function already have decorator.""" - - @task - @foo_decorator - def foo(): - print(TASK_NAME) - - with ProcessDefinition(PD_NAME) as pd: # noqa: F841 - with pytest.raises( - PyDSParamException, match="Do no support other decorators for.*" - ): - foo() - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - side_effect=Task("test_func_wrap", "func_wrap").gen_code_and_version, -) -def test_multiple_tasks_outside(mock_code): - """Test multiple decorator tasks which outside process definition.""" - - @task - def foo(): - print(TASK_NAME) - - @task - def bar(): - print(TASK_NAME) - - with ProcessDefinition(PD_NAME) as pd: - foo = foo() - bar = bar() - - foo >> bar - - assert pd is not None and pd.name == PD_NAME - assert len(pd.tasks) == 2 - - task_foo = pd.get_one_task_by_name("foo") - task_bar = pd.get_one_task_by_name("bar") - assert set(pd.task_list) == {task_foo, task_bar} - assert ( - task_foo is not None - and task_foo._upstream_task_codes == set() - and task_foo._downstream_task_codes.pop() == task_bar.code - ) - assert ( - task_bar is not None - and task_bar._upstream_task_codes.pop() == task_foo.code - and task_bar._downstream_task_codes == set() - ) - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - side_effect=Task("test_func_wrap", "func_wrap").gen_code_and_version, -) -def test_multiple_tasks_inside(mock_code): - """Test multiple decorator tasks which inside process definition.""" - with ProcessDefinition(PD_NAME) as pd: - - @task - def foo(): - print(TASK_NAME) - - @task - def bar(): - print(TASK_NAME) - - foo = foo() - bar = bar() - - foo >> bar - - assert pd is not None and pd.name == PD_NAME - assert len(pd.tasks) == 2 - - task_foo = pd.get_one_task_by_name("foo") - task_bar = pd.get_one_task_by_name("bar") - assert set(pd.task_list) == {task_foo, task_bar} - assert ( - task_foo is not None - and task_foo._upstream_task_codes == set() - and task_foo._downstream_task_codes.pop() == task_bar.code - ) - assert ( - task_bar is not None - and task_bar._upstream_task_codes.pop() == task_foo.code - and task_bar._downstream_task_codes == set() - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_http.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_http.py deleted file mode 100644 index 399829b68c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_http.py +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task HTTP.""" - -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.tasks.http import Http, HttpCheckCondition, HttpMethod - - -@pytest.mark.parametrize( - "class_name, attrs", - [ - (HttpMethod, ("GET", "POST", "HEAD", "PUT", "DELETE")), - ( - HttpCheckCondition, - ( - "STATUS_CODE_DEFAULT", - "STATUS_CODE_CUSTOM", - "BODY_CONTAINS", - "BODY_NOT_CONTAINS", - ), - ), - ], -) -def test_attr_exists(class_name, attrs): - """Test weather class HttpMethod and HttpCheckCondition contain specific attribute.""" - assert all(hasattr(class_name, attr) for attr in attrs) - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - {"url": "https://www.apache.org"}, - { - "url": "https://www.apache.org", - "httpMethod": "GET", - "httpParams": [], - "httpCheckCondition": "STATUS_CODE_DEFAULT", - "condition": None, - "connectTimeout": 60000, - "socketTimeout": 60000, - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_property_task_params(mock_code_version, attr, expect): - """Test task http property.""" - task = Http("test-http-task-params", **attr) - assert expect == task.task_params - - -@pytest.mark.parametrize( - "param", - [ - {"http_method": "http_method"}, - {"http_check_condition": "http_check_condition"}, - {"http_check_condition": HttpCheckCondition.STATUS_CODE_CUSTOM}, - { - "http_check_condition": HttpCheckCondition.STATUS_CODE_CUSTOM, - "condition": None, - }, - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_http_task_param_not_support_param(mock_code, param): - """Test HttpTaskParams not support parameter.""" - url = "https://www.apache.org" - with pytest.raises(PyDSParamException, match="Parameter .*?"): - Http("test-no-supprot-param", url, **param) - - -def test_http_get_define(): - """Test task HTTP function get_define.""" - code = 123 - version = 1 - name = "test_http_get_define" - url = "https://www.apache.org" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "HTTP", - "taskParams": { - "localParams": [], - "httpParams": [], - "url": url, - "httpMethod": "GET", - "httpCheckCondition": "STATUS_CODE_DEFAULT", - "condition": None, - "connectTimeout": 60000, - "socketTimeout": 60000, - "dependence": {}, - "resourceList": [], - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - http = Http(name, url) - assert http.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_map_reduce.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_map_reduce.py deleted file mode 100644 index 5d38e93aa4..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_map_reduce.py +++ /dev/null @@ -1,76 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task MR.""" - -from unittest.mock import patch - -from pydolphinscheduler.tasks.map_reduce import MR, ProgramType - - -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "test"}), -) -def test_mr_get_define(mock_resource): - """Test task mr function get_define.""" - code = 123 - version = 1 - name = "test_mr_get_define" - main_class = "org.apache.mr.test_main_class" - main_package = "test_main_package" - program_type = ProgramType.JAVA - main_args = "/dolphinscheduler/resources/file.txt /output/ds" - - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "MR", - "taskParams": { - "mainClass": main_class, - "mainJar": { - "id": 1, - }, - "programType": program_type, - "appName": None, - "mainArgs": main_args, - "others": None, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = MR(name, main_class, main_package, program_type, main_args=main_args) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py deleted file mode 100644 index af0a324b53..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py +++ /dev/null @@ -1,205 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task MLflow.""" -from copy import deepcopy -from unittest.mock import patch - -from pydolphinscheduler.tasks.mlflow import ( - MLflowDeployType, - MLflowJobType, - MLflowModels, - MLFlowProjectsAutoML, - MLFlowProjectsBasicAlgorithm, - MLFlowProjectsCustom, - MLflowTaskType, -) - -CODE = 123 -VERSION = 1 -MLFLOW_TRACKING_URI = "http://127.0.0.1:5000" - -EXPECT = { - "code": CODE, - "version": VERSION, - "description": None, - "delayTime": 0, - "taskType": "MLFLOW", - "taskParams": { - "resourceList": [], - "localParams": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, -} - - -def test_mlflow_models_get_define(): - """Test task mlflow models function get_define.""" - name = "mlflow_models" - model_uri = "models:/xgboost_native/Production" - port = 7001 - - expect = deepcopy(EXPECT) - expect["name"] = name - task_params = expect["taskParams"] - task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI - task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_MODELS - task_params["deployType"] = MLflowDeployType.DOCKER - task_params["deployModelKey"] = model_uri - task_params["deployPort"] = port - - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(CODE, VERSION), - ): - task = MLflowModels( - name=name, - model_uri=model_uri, - mlflow_tracking_uri=MLFLOW_TRACKING_URI, - deploy_mode=MLflowDeployType.DOCKER, - port=port, - ) - assert task.get_define() == expect - - -def test_mlflow_project_custom_get_define(): - """Test task mlflow project custom function get_define.""" - name = ("train_xgboost_native",) - repository = "https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native" - mlflow_tracking_uri = MLFLOW_TRACKING_URI - parameters = "-P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9" - experiment_name = "xgboost" - - expect = deepcopy(EXPECT) - expect["name"] = name - task_params = expect["taskParams"] - - task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI - task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_PROJECTS - task_params["mlflowJobType"] = MLflowJobType.CUSTOM_PROJECT - task_params["experimentName"] = experiment_name - task_params["params"] = parameters - task_params["mlflowProjectRepository"] = repository - task_params["mlflowProjectVersion"] = "dev" - - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(CODE, VERSION), - ): - task = MLFlowProjectsCustom( - name=name, - repository=repository, - mlflow_tracking_uri=mlflow_tracking_uri, - parameters=parameters, - experiment_name=experiment_name, - version="dev", - ) - assert task.get_define() == expect - - -def test_mlflow_project_automl_get_define(): - """Test task mlflow project automl function get_define.""" - name = ("train_automl",) - mlflow_tracking_uri = MLFLOW_TRACKING_URI - parameters = "time_budget=30;estimator_list=['lgbm']" - experiment_name = "automl_iris" - model_name = "iris_A" - automl_tool = "flaml" - data_path = "/data/examples/iris" - - expect = deepcopy(EXPECT) - expect["name"] = name - task_params = expect["taskParams"] - - task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI - task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_PROJECTS - task_params["mlflowJobType"] = MLflowJobType.AUTOML - task_params["experimentName"] = experiment_name - task_params["modelName"] = model_name - task_params["registerModel"] = bool(model_name) - task_params["dataPath"] = data_path - task_params["params"] = parameters - task_params["automlTool"] = automl_tool - - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(CODE, VERSION), - ): - task = MLFlowProjectsAutoML( - name=name, - mlflow_tracking_uri=mlflow_tracking_uri, - parameters=parameters, - experiment_name=experiment_name, - model_name=model_name, - automl_tool=automl_tool, - data_path=data_path, - ) - assert task.get_define() == expect - - -def test_mlflow_project_basic_algorithm_get_define(): - """Test task mlflow project BasicAlgorithm function get_define.""" - name = "train_basic_algorithm" - mlflow_tracking_uri = MLFLOW_TRACKING_URI - parameters = "n_estimators=200;learning_rate=0.2" - experiment_name = "basic_algorithm_iris" - model_name = "iris_B" - algorithm = "lightgbm" - data_path = "/data/examples/iris" - search_params = "max_depth=[5, 10];n_estimators=[100, 200]" - - expect = deepcopy(EXPECT) - expect["name"] = name - task_params = expect["taskParams"] - - task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI - task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_PROJECTS - task_params["mlflowJobType"] = MLflowJobType.BASIC_ALGORITHM - task_params["experimentName"] = experiment_name - task_params["modelName"] = model_name - task_params["registerModel"] = bool(model_name) - task_params["dataPath"] = data_path - task_params["params"] = parameters - task_params["algorithm"] = algorithm - task_params["searchParams"] = search_params - - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(CODE, VERSION), - ): - task = MLFlowProjectsBasicAlgorithm( - name=name, - mlflow_tracking_uri=mlflow_tracking_uri, - parameters=parameters, - experiment_name=experiment_name, - model_name=model_name, - algorithm=algorithm, - data_path=data_path, - search_params=search_params, - ) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_openmldb.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_openmldb.py deleted file mode 100644 index f580ab06b2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_openmldb.py +++ /dev/null @@ -1,73 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task OpenMLDB.""" -from unittest.mock import patch - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.tasks.openmldb import OpenMLDB - - -def test_openmldb_get_define(): - """Test task openmldb function get_define.""" - zookeeper = "127.0.0.1:2181" - zookeeper_path = "/openmldb" - execute_mode = "offline" - - sql = """USE demo_db; - set @@job_timeout=200000; - LOAD DATA INFILE 'file:///tmp/train_sample.csv' - INTO TABLE talkingdata OPTIONS(mode='overwrite'); - """ - - code = 123 - version = 1 - name = "test_openmldb_get_define" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": TaskType.OPENMLDB, - "taskParams": { - "resourceList": [], - "localParams": [], - "zk": zookeeper, - "zkPath": zookeeper_path, - "executeMode": execute_mode, - "sql": sql, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - openmldb = OpenMLDB(name, zookeeper, zookeeper_path, execute_mode, sql) - assert openmldb.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_procedure.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_procedure.py deleted file mode 100644 index 80afe7b879..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_procedure.py +++ /dev/null @@ -1,107 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Procedure.""" - -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.tasks.procedure import Procedure - -TEST_PROCEDURE_SQL = ( - 'create procedure HelloWorld() selece "hello world"; call HelloWorld();' -) -TEST_PROCEDURE_DATASOURCE_NAME = "test_datasource" - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "test-procedure-task-params", - "datasource_name": TEST_PROCEDURE_DATASOURCE_NAME, - "method": TEST_PROCEDURE_SQL, - }, - { - "method": TEST_PROCEDURE_SQL, - "type": "MYSQL", - "datasource": 1, - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "MYSQL"}), -) -def test_property_task_params(mock_datasource, mock_code_version, attr, expect): - """Test task sql task property.""" - task = Procedure(**attr) - assert expect == task.task_params - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "MYSQL"}), -) -def test_sql_get_define(mock_datasource, mock_code_version): - """Test task procedure function get_define.""" - name = "test_procedure_get_define" - expect = { - "code": 123, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "PROCEDURE", - "taskParams": { - "type": "MYSQL", - "datasource": 1, - "method": TEST_PROCEDURE_SQL, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - task = Procedure(name, TEST_PROCEDURE_DATASOURCE_NAME, TEST_PROCEDURE_SQL) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_python.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_python.py deleted file mode 100644 index 77aa10625b..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_python.py +++ /dev/null @@ -1,201 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task python.""" -from pathlib import Path -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.resources_plugin import Local -from pydolphinscheduler.tasks.python import Python -from pydolphinscheduler.utils import file -from tests.testing.file import delete_file - - -def foo(): # noqa: D103 - print("hello world.") - - -@pytest.fixture() -def setup_crt_first(request): - """Set up and teardown about create file first and then delete it.""" - file_content = request.param.get("file_content") - file_path = request.param.get("file_path") - file.write( - content=file_content, - to_path=file_path, - ) - yield - delete_file(file_path) - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - {"definition": "print(1)"}, - { - "definition": "print(1)", - "rawScript": "print(1)", - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ), - ( - {"definition": "def foo():\n print('I am foo')"}, - { - "definition": "def foo():\n print('I am foo')", - "rawScript": "def foo():\n print('I am foo')\nfoo()", - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ), - ( - {"definition": foo}, - { - "definition": foo, - "rawScript": 'def foo(): # noqa: D103\n print("hello world.")\nfoo()', - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_property_task_params(mock_code_version, attr, expect): - """Test task python property.""" - task = Python("test-python-task-params", **attr) - assert expect == task.task_params - - -@pytest.mark.parametrize( - "script_code", - [ - 123, - ("print", "hello world"), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_python_task_not_support_code(mock_code, script_code): - """Test python task parameters.""" - name = "not_support_code_type" - with pytest.raises( - PyDSParamException, match="Parameter definition do not support .*?" - ): - task = Python(name, script_code) - task.raw_script - - -@pytest.mark.parametrize( - "name, script_code, raw", - [ - ("string_define", 'print("hello world.")', 'print("hello world.")'), - ( - "function_define", - foo, - 'def foo(): # noqa: D103\n print("hello world.")\nfoo()', - ), - ], -) -def test_python_get_define(name, script_code, raw): - """Test task python function get_define.""" - code = 123 - version = 1 - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "PYTHON", - "taskParams": { - "definition": script_code, - "resourceList": [], - "localParams": [], - "rawScript": raw, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - shell = Python(name, script_code) - assert shell.get_define() == expect - - -@pytest.mark.parametrize( - "setup_crt_first", - [ - { - "file_path": Path(__file__).parent.joinpath("local_res.py"), - "file_content": "test local resource", - } - ], - indirect=True, -) -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "task_python", - "definition": "local_res.py", - "resource_plugin": Local(str(Path(__file__).parent)), - }, - "test local resource", - ), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_resources_local_python_command_content( - mock_code_version, attr, expect, setup_crt_first -): - """Test task Python definition content through the local resource plug-in.""" - python = Python(**attr) - assert expect == getattr(python, "definition") diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_pytorch.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_pytorch.py deleted file mode 100644 index eccb51ca31..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_pytorch.py +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Pytorch.""" -from copy import deepcopy -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.tasks.pytorch import DEFAULT, Pytorch -from tests.testing.task import Task - -CODE = 123 -VERSION = 1 - -EXPECT = { - "code": CODE, - "version": VERSION, - "description": None, - "delayTime": 0, - "taskType": "PYTORCH", - "taskParams": { - "resourceList": [], - "localParams": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, -} - - -def test_pytorch_get_define(): - """Test task pytorch function get_define.""" - name = "task_conda_env" - script = "main.py" - script_params = "--dry-run --no-cuda" - project_path = "https://github.com/pytorch/examples#mnist" - is_create_environment = True - python_env_tool = "conda" - requirements = "requirements.txt" - conda_python_version = "3.7" - - expect = deepcopy(EXPECT) - expect["name"] = name - task_params = expect["taskParams"] - - task_params["script"] = script - task_params["scriptParams"] = script_params - task_params["pythonPath"] = project_path - task_params["otherParams"] = True - task_params["isCreateEnvironment"] = is_create_environment - task_params["pythonCommand"] = "${PYTHON_HOME}" - task_params["pythonEnvTool"] = python_env_tool - task_params["requirements"] = requirements - task_params["condaPythonVersion"] = conda_python_version - - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(CODE, VERSION), - ): - task = Pytorch( - name=name, - script=script, - script_params=script_params, - project_path=project_path, - is_create_environment=is_create_environment, - python_env_tool=python_env_tool, - requirements=requirements, - ) - assert task.get_define() == expect - - -@pytest.mark.parametrize( - "is_create_environment, project_path, python_command, expect", - [ - ( - DEFAULT.is_create_environment, - DEFAULT.project_path, - DEFAULT.python_command, - False, - ), - (True, DEFAULT.project_path, DEFAULT.python_command, True), - (DEFAULT.is_create_environment, "/home", DEFAULT.python_command, True), - (DEFAULT.is_create_environment, DEFAULT.project_path, "/usr/bin/python", True), - ], -) -def test_other_params(is_create_environment, project_path, python_command, expect): - """Test task pytorch function other_params.""" - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - side_effect=Task("test_func_wrap", "func_wrap").gen_code_and_version, - ): - task = Pytorch( - name="test", - script="", - script_params="", - project_path=project_path, - is_create_environment=is_create_environment, - python_command=python_command, - ) - assert task.other_params == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sagemaker.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sagemaker.py deleted file mode 100644 index 20edc22805..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sagemaker.py +++ /dev/null @@ -1,102 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task SageMaker.""" -import json -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.tasks.sagemaker import SageMaker - -sagemaker_request_json = json.dumps( - { - "ParallelismConfiguration": {"MaxParallelExecutionSteps": 1}, - "PipelineExecutionDescription": "test Pipeline", - "PipelineExecutionDisplayName": "AbalonePipeline", - "PipelineName": "AbalonePipeline", - "PipelineParameters": [ - {"Name": "ProcessingInstanceType", "Value": "ml.m4.xlarge"}, - {"Name": "ProcessingInstanceCount", "Value": "2"}, - ], - }, - indent=2, -) - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - {"sagemaker_request_json": sagemaker_request_json}, - { - "sagemakerRequestJson": sagemaker_request_json, - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_property_task_params(mock_code_version, attr, expect): - """Test task sagemaker task property.""" - task = SageMaker("test-sagemaker-task-params", **attr) - assert expect == task.task_params - - -def test_sagemaker_get_define(): - """Test task sagemaker function get_define.""" - code = 123 - version = 1 - name = "test_sagemaker_get_define" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "SAGEMAKER", - "taskParams": { - "resourceList": [], - "localParams": [], - "sagemakerRequestJson": sagemaker_request_json, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - sagemaker = SageMaker(name, sagemaker_request_json) - assert sagemaker.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_shell.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_shell.py deleted file mode 100644 index 9344ac2bb0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_shell.py +++ /dev/null @@ -1,133 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task shell.""" - -from pathlib import Path -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.resources_plugin import Local -from pydolphinscheduler.tasks.shell import Shell -from pydolphinscheduler.utils import file -from tests.testing.file import delete_file - -file_name = "local_res.sh" -file_content = 'echo "test res_local"' -res_plugin_prefix = Path(__file__).parent -file_path = res_plugin_prefix.joinpath(file_name) - - -@pytest.fixture -def setup_crt_first(): - """Set up and teardown about create file first and then delete it.""" - file.write(content=file_content, to_path=file_path) - yield - delete_file(file_path) - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - {"command": "test script"}, - { - "rawScript": "test script", - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_property_task_params(mock_code_version, attr, expect): - """Test task shell task property.""" - task = Shell("test-shell-task-params", **attr) - assert expect == task.task_params - - -def test_shell_get_define(): - """Test task shell function get_define.""" - code = 123 - version = 1 - name = "test_shell_get_define" - command = "echo test shell" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "environmentCode": None, - "delayTime": 0, - "taskType": "SHELL", - "taskParams": { - "resourceList": [], - "localParams": [], - "rawScript": command, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - shell = Shell(name, command) - print(shell.get_define()) - assert shell.get_define() == expect - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "test-local-res-command-content", - "command": file_name, - "resource_plugin": Local(str(res_plugin_prefix)), - }, - file_content, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_resources_local_shell_command_content( - mock_code_version, attr, expect, setup_crt_first -): - """Test task shell task command content through the local resource plug-in.""" - task = Shell(**attr) - assert expect == getattr(task, "raw_script") diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_spark.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_spark.py deleted file mode 100644 index 1fdb1fa400..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_spark.py +++ /dev/null @@ -1,82 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Spark.""" - -from unittest.mock import patch - -from pydolphinscheduler.tasks.spark import DeployMode, ProgramType, Spark - - -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "test"}), -) -def test_spark_get_define(mock_resource): - """Test task spark function get_define.""" - code = 123 - version = 1 - name = "test_spark_get_define" - main_class = "org.apache.spark.test_main_class" - main_package = "test_main_package" - program_type = ProgramType.JAVA - deploy_mode = DeployMode.LOCAL - - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "SPARK", - "taskParams": { - "mainClass": main_class, - "mainJar": { - "id": 1, - }, - "programType": program_type, - "deployMode": deploy_mode, - "driverCores": 1, - "driverMemory": "512M", - "numExecutors": 2, - "executorMemory": "2G", - "executorCores": 2, - "appName": None, - "mainArgs": None, - "others": None, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = Spark(name, main_class, main_package, program_type, deploy_mode) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py deleted file mode 100644 index a22d9206d0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py +++ /dev/null @@ -1,208 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Sql.""" -from pathlib import Path -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.resources_plugin import Local -from pydolphinscheduler.tasks.sql import Sql, SqlType -from pydolphinscheduler.utils import file -from tests.testing.file import delete_file - -file_name = "local_res.sql" -file_content = "select 1" -res_plugin_prefix = Path(__file__).parent -file_path = res_plugin_prefix.joinpath(file_name) - - -@pytest.fixture -def setup_crt_first(): - """Set up and teardown about create file first and then delete it.""" - file.write(content=file_content, to_path=file_path) - yield - delete_file(file_path) - - -@pytest.mark.parametrize( - "sql, param_sql_type, sql_type", - [ - ("select 1", None, SqlType.SELECT), - (" select 1", None, SqlType.SELECT), - (" select 1 ", None, SqlType.SELECT), - (" select 'insert' ", None, SqlType.SELECT), - (" select 'insert ' ", None, SqlType.SELECT), - ("with tmp as (select 1) select * from tmp ", None, SqlType.SELECT), - ( - "insert into table_name(col1, col2) value (val1, val2)", - None, - SqlType.NOT_SELECT, - ), - ( - "insert into table_name(select, col2) value ('select', val2)", - None, - SqlType.NOT_SELECT, - ), - ("update table_name SET col1=val1 where col1=val2", None, SqlType.NOT_SELECT), - ( - "update table_name SET col1='select' where col1=val2", - None, - SqlType.NOT_SELECT, - ), - ("delete from table_name where id < 10", None, SqlType.NOT_SELECT), - ("delete from table_name where id < 10", None, SqlType.NOT_SELECT), - ("alter table table_name add column col1 int", None, SqlType.NOT_SELECT), - ("create table table_name2 (col1 int)", None, SqlType.NOT_SELECT), - ("truncate table table_name", None, SqlType.NOT_SELECT), - ("create table table_name2 (col1 int)", SqlType.SELECT, SqlType.SELECT), - ("select 1", SqlType.NOT_SELECT, SqlType.NOT_SELECT), - ("create table table_name2 (col1 int)", SqlType.NOT_SELECT, SqlType.NOT_SELECT), - ("select 1", SqlType.SELECT, SqlType.SELECT), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "mock_type"}), -) -def test_get_sql_type( - mock_datasource, mock_code_version, sql, param_sql_type, sql_type -): - """Test property sql_type could return correct type.""" - name = "test_get_sql_type" - datasource_name = "test_datasource" - task = Sql(name, datasource_name, sql, sql_type=param_sql_type) - assert ( - sql_type == task.sql_type - ), f"Sql {sql} expect sql type is {sql_type} but got {task.sql_type}" - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - {"datasource_name": "datasource_name", "sql": "select 1"}, - { - "sql": "select 1", - "type": "MYSQL", - "datasource": 1, - "sqlType": "0", - "preStatements": [], - "postStatements": [], - "displayRows": 10, - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "MYSQL"}), -) -def test_property_task_params(mock_datasource, mock_code_version, attr, expect): - """Test task sql task property.""" - task = Sql("test-sql-task-params", **attr) - assert expect == task.task_params - - -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "MYSQL"}), -) -def test_sql_get_define(mock_datasource): - """Test task sql function get_define.""" - code = 123 - version = 1 - name = "test_sql_get_define" - command = "select 1" - datasource_name = "test_datasource" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "SQL", - "taskParams": { - "type": "MYSQL", - "datasource": 1, - "sql": command, - "sqlType": "0", - "displayRows": 10, - "preStatements": [], - "postStatements": [], - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = Sql(name, datasource_name, command) - assert task.get_define() == expect - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "test-sql-local-res", - "sql": file_name, - "datasource_name": "test_datasource", - "resource_plugin": Local(str(res_plugin_prefix)), - }, - file_content, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_resources_local_sql_command_content( - mock_code_version, attr, expect, setup_crt_first -): - """Test sql content through the local resource plug-in.""" - sql = Sql(**attr) - assert expect == getattr(sql, "sql") diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sub_process.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sub_process.py deleted file mode 100644 index 126ab1015e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sub_process.py +++ /dev/null @@ -1,115 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task sub_process.""" - - -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.sub_process import SubProcess - -TEST_SUB_PROCESS_DEFINITION_NAME = "sub-test-process-definition" -TEST_SUB_PROCESS_DEFINITION_CODE = "3643589832320" -TEST_PROCESS_DEFINITION_NAME = "simple-test-process-definition" - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - {"process_definition_name": TEST_SUB_PROCESS_DEFINITION_NAME}, - { - "processDefinitionCode": TEST_SUB_PROCESS_DEFINITION_CODE, - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.tasks.sub_process.SubProcess.get_process_definition_info", - return_value=( - { - "id": 1, - "name": TEST_SUB_PROCESS_DEFINITION_NAME, - "code": TEST_SUB_PROCESS_DEFINITION_CODE, - } - ), -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_property_task_params(mock_code_version, mock_pd_info, attr, expect): - """Test task sub process property.""" - task = SubProcess("test-sub-process-task-params", **attr) - assert expect == task.task_params - - -@patch( - "pydolphinscheduler.tasks.sub_process.SubProcess.get_process_definition_info", - return_value=( - { - "id": 1, - "name": TEST_SUB_PROCESS_DEFINITION_NAME, - "code": TEST_SUB_PROCESS_DEFINITION_CODE, - } - ), -) -def test_sub_process_get_define(mock_process_definition): - """Test task sub_process function get_define.""" - code = 123 - version = 1 - name = "test_sub_process_get_define" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "SUB_PROCESS", - "taskParams": { - "resourceList": [], - "localParams": [], - "processDefinitionCode": TEST_SUB_PROCESS_DEFINITION_CODE, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME): - sub_process = SubProcess(name, TEST_SUB_PROCESS_DEFINITION_NAME) - assert sub_process.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_switch.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_switch.py deleted file mode 100644 index 6f9222cec0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_switch.py +++ /dev/null @@ -1,299 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task switch.""" - -from typing import Optional, Tuple -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.tasks.switch import ( - Branch, - Default, - Switch, - SwitchBranch, - SwitchCondition, -) -from tests.testing.task import Task - -TEST_NAME = "test-task" -TEST_TYPE = "test-type" - - -def task_switch_arg_wrapper(obj, task: Task, exp: Optional[str] = None) -> SwitchBranch: - """Wrap task switch and its subclass.""" - if obj is Default: - return obj(task) - elif obj is Branch: - return obj(exp, task) - else: - return obj(task, exp) - - -@pytest.mark.parametrize( - "obj", - [ - SwitchBranch, - Branch, - Default, - ], -) -def test_switch_branch_attr_next_node(obj: SwitchBranch): - """Test get attribute from class switch branch.""" - task = Task(name=TEST_NAME, task_type=TEST_TYPE) - switch_branch = task_switch_arg_wrapper(obj, task=task, exp="unittest") - assert switch_branch.next_node == task.code - - -@pytest.mark.parametrize( - "obj", - [ - SwitchBranch, - Default, - ], -) -def test_switch_branch_get_define_without_condition(obj: SwitchBranch): - """Test function :func:`get_define` with None value of attribute condition from class switch branch.""" - task = Task(name=TEST_NAME, task_type=TEST_TYPE) - expect = {"nextNode": task.code} - switch_branch = task_switch_arg_wrapper(obj, task=task) - assert switch_branch.get_define() == expect - - -@pytest.mark.parametrize( - "obj", - [ - SwitchBranch, - Branch, - ], -) -def test_switch_branch_get_define_condition(obj: SwitchBranch): - """Test function :func:`get_define` with specific attribute condition from class switch branch.""" - task = Task(name=TEST_NAME, task_type=TEST_TYPE) - exp = "${var} == 1" - expect = { - "nextNode": task.code, - "condition": exp, - } - switch_branch = task_switch_arg_wrapper(obj, task=task, exp=exp) - assert switch_branch.get_define() == expect - - -@pytest.mark.parametrize( - "args, msg", - [ - ( - (1,), - ".*?parameter only support SwitchBranch but got.*?", - ), - ( - (Default(Task(TEST_NAME, TEST_TYPE)), 2), - ".*?parameter only support SwitchBranch but got.*?", - ), - ( - (Default(Task(TEST_NAME, TEST_TYPE)), Default(Task(TEST_NAME, TEST_TYPE))), - ".*?parameter only support exactly one default branch", - ), - ( - ( - Branch(condition="unittest", task=Task(TEST_NAME, TEST_TYPE)), - Default(Task(TEST_NAME, TEST_TYPE)), - Default(Task(TEST_NAME, TEST_TYPE)), - ), - ".*?parameter only support exactly one default branch", - ), - ], -) -def test_switch_condition_set_define_attr_error(args: Tuple, msg: str): - """Test error case on :class:`SwitchCondition`.""" - switch_condition = SwitchCondition(*args) - with pytest.raises(PyDSParamException, match=msg): - switch_condition.set_define_attr() - - -def test_switch_condition_set_define_attr_default(): - """Test set :class:`Default` to attribute on :class:`SwitchCondition`.""" - task = Task(TEST_NAME, TEST_TYPE) - switch_condition = SwitchCondition(Default(task)) - switch_condition.set_define_attr() - assert getattr(switch_condition, "next_node") == task.code - assert getattr(switch_condition, "depend_task_list") == [] - - -def test_switch_condition_set_define_attr_branch(): - """Test set :class:`Branch` to attribute on :class:`SwitchCondition`.""" - task = Task(TEST_NAME, TEST_TYPE) - switch_condition = SwitchCondition( - Branch("unittest1", task), Branch("unittest2", task) - ) - expect = [ - {"condition": "unittest1", "nextNode": task.code}, - {"condition": "unittest2", "nextNode": task.code}, - ] - - switch_condition.set_define_attr() - assert getattr(switch_condition, "next_node") == "" - assert getattr(switch_condition, "depend_task_list") == expect - - -def test_switch_condition_set_define_attr_mix_branch_and_default(): - """Test set bot :class:`Branch` and :class:`Default` to attribute on :class:`SwitchCondition`.""" - task = Task(TEST_NAME, TEST_TYPE) - switch_condition = SwitchCondition( - Branch("unittest1", task), Branch("unittest2", task), Default(task) - ) - expect = [ - {"condition": "unittest1", "nextNode": task.code}, - {"condition": "unittest2", "nextNode": task.code}, - ] - - switch_condition.set_define_attr() - assert getattr(switch_condition, "next_node") == task.code - assert getattr(switch_condition, "depend_task_list") == expect - - -def test_switch_condition_get_define_default(): - """Test function :func:`get_define` with :class:`Default` in :class:`SwitchCondition`.""" - task = Task(TEST_NAME, TEST_TYPE) - switch_condition = SwitchCondition(Default(task)) - expect = { - "dependTaskList": [], - "nextNode": task.code, - } - assert switch_condition.get_define() == expect - - -def test_switch_condition_get_define_branch(): - """Test function :func:`get_define` with :class:`Branch` in :class:`SwitchCondition`.""" - task = Task(TEST_NAME, TEST_TYPE) - switch_condition = SwitchCondition( - Branch("unittest1", task), Branch("unittest2", task) - ) - expect = { - "dependTaskList": [ - {"condition": "unittest1", "nextNode": task.code}, - {"condition": "unittest2", "nextNode": task.code}, - ], - "nextNode": "", - } - assert switch_condition.get_define() == expect - - -def test_switch_condition_get_define_mix_branch_and_default(): - """Test function :func:`get_define` with both :class:`Branch` and :class:`Default`.""" - task = Task(TEST_NAME, TEST_TYPE) - switch_condition = SwitchCondition( - Branch("unittest1", task), Branch("unittest2", task), Default(task) - ) - expect = { - "dependTaskList": [ - {"condition": "unittest1", "nextNode": task.code}, - {"condition": "unittest2", "nextNode": task.code}, - ], - "nextNode": task.code, - } - assert switch_condition.get_define() == expect - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_switch_get_define(mock_task_code_version): - """Test task switch :func:`get_define`.""" - task = Task(name=TEST_NAME, task_type=TEST_TYPE) - switch_condition = SwitchCondition( - Branch(condition="${var1} > 1", task=task), - Branch(condition="${var1} <= 1", task=task), - Default(task), - ) - - name = "test_switch_get_define" - expect = { - "code": 123, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "SWITCH", - "taskParams": { - "resourceList": [], - "localParams": [], - "waitStartTimeout": {}, - "switchResult": { - "dependTaskList": [ - {"condition": "${var1} > 1", "nextNode": task.code}, - {"condition": "${var1} <= 1", "nextNode": task.code}, - ], - "nextNode": task.code, - }, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - - task = Switch(name, condition=switch_condition) - assert task.get_define() == expect - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_switch_set_dep_workflow(mock_task_code_version): - """Test task switch set dependence in workflow level.""" - with ProcessDefinition(name="test-switch-set-dep-workflow") as pd: - parent = Task(name="parent", task_type=TEST_TYPE) - switch_child_1 = Task(name="switch_child_1", task_type=TEST_TYPE) - switch_child_2 = Task(name="switch_child_2", task_type=TEST_TYPE) - switch_condition = SwitchCondition( - Branch(condition="${var} > 1", task=switch_child_1), - Default(task=switch_child_2), - ) - - switch = Switch(name=TEST_NAME, condition=switch_condition) - parent >> switch - # General tasks test - assert len(pd.tasks) == 4 - assert sorted(pd.task_list, key=lambda t: t.name) == sorted( - [parent, switch, switch_child_1, switch_child_2], key=lambda t: t.name - ) - # Task dep test - assert parent._downstream_task_codes == {switch.code} - assert switch._upstream_task_codes == {parent.code} - - # Switch task dep after ProcessDefinition function get_define called - assert switch._downstream_task_codes == { - switch_child_1.code, - switch_child_2.code, - } - assert all( - [ - child._upstream_task_codes == {switch.code} - for child in [switch_child_1, switch_child_2] - ] - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/test_docs.py b/dolphinscheduler-python/pydolphinscheduler/tests/test_docs.py deleted file mode 100644 index 930e4f709e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/test_docs.py +++ /dev/null @@ -1,59 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test pydolphinscheduler docs.""" - -import re - -from tests.testing.constants import task_without_example -from tests.testing.path import get_doc_tasks, get_tasks - -ignore_code_file = {"__init__.py"} -ignore_doc_file = {"index.rst"} - - -def test_without_missing_task_rst(): - """Test without missing any task document by compare filename. - - Avoiding add new type of tasks but without adding document about it. - """ - code_files = {p.stem for p in get_tasks(ignore_name=ignore_code_file)} - doc_files = {p.stem for p in get_doc_tasks(ignore_name=ignore_doc_file)} - assert code_files == doc_files - - -def test_task_without_example(): - """Test task document which without example. - - Avoiding add new type of tasks but without adding example content describe how to use it. - """ - task_without_example_detected = set() - pattern = re.compile("Example\n-------") - - for doc in get_doc_tasks(ignore_name=ignore_doc_file): - search_result = pattern.search(doc.read_text()) - if not search_result: - task_without_example_detected.add(doc.stem) - assert task_without_example == task_without_example_detected - - -def test_doc_automodule_directive_name(): - """Test task document with correct name in directive automodule.""" - pattern = re.compile(".. automodule:: (.*)") - for doc in get_doc_tasks(ignore_name=ignore_doc_file): - match_string = pattern.search(doc.read_text()).group(1) - assert f"pydolphinscheduler.tasks.{doc.stem}" == match_string diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/__init__.py deleted file mode 100644 index c8caf5b5af..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init testing package, it provider easy way for pydolphinscheduler test.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/cli.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/cli.py deleted file mode 100644 index 0d2c1d1fbf..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/cli.py +++ /dev/null @@ -1,87 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Utils of command line test.""" - - -from click.testing import CliRunner - -from tests.testing.constants import DEV_MODE - - -class CliTestWrapper: - """Wrap command click CliRunner.invoke.""" - - def __init__(self, *args, **kwargs): - runner = CliRunner() - self.result = runner.invoke(*args, **kwargs) - self.show_result_output() - - def _assert_output(self, output: str = None, fuzzy: bool = False): - """Assert between `CliRunner.invoke.result.output` and parameter `output`. - - :param output: The output will check compare to the ``CliRunner.invoke.output``. - :param fuzzy: A flag define whether assert :param:`output` in fuzzy or not. - Check if `CliRunner.invoke.output` contain :param:`output` is set ``True`` - and CliRunner.invoke.output equal to :param:`output` if we set it ``False``. - """ - if not output: - return - if fuzzy: - assert output in self.result.output - else: - assert self.result.output.rstrip("\n") == output - - def show_result_output(self): - """Print `CliRunner.invoke.result` output content in debug mode. - - It read variable named `PY_DOLPHINSCHEDULER_DEV_MODE` from env, when it set to `true` or `t` or `1` - will print result output when class :class:`CliTestWrapper` is initialization. - """ - if DEV_MODE: - print(f"\n{self.result.output}\n") - - def assert_success(self, output: str = None, fuzzy: bool = False): - """Assert test is success. - - It would check whether `CliRunner.invoke.exit_code` equals to `0`, with no - exception at the same time. It's also can test the content of `CliRunner.invoke.output`. - - :param output: The output will check compare to the ``CliRunner.invoke.output``. - :param fuzzy: A flag define whether assert :param:`output` in fuzzy or not. - Check if `CliRunner.invoke.output` contain :param:`output` is set ``True`` - and CliRunner.invoke.output equal to :param:`output` if we set it ``False``. - """ - assert self.result.exit_code == 0 - if self.result.exception: - raise self.result.exception - self._assert_output(output, fuzzy) - - def assert_fail(self, ret_code: int, output: str = None, fuzzy: bool = False): - """Assert test is fail. - - It would check whether `CliRunner.invoke.exit_code` equals to :param:`ret_code`, - and it will also can test the content of `CliRunner.invoke.output`. - - :param ret_code: The returning code of this fail test. - :param output: The output will check compare to the ``CliRunner.invoke.output``. - :param fuzzy: A flag define whether assert :param:`output` in fuzzy or not. - Check if `CliRunner.invoke.output` contain :param:`output` is set ``True`` - and CliRunner.invoke.output equal to :param:`output` if we set it ``False``. - """ - assert ret_code == self.result.exit_code - self._assert_output(output, fuzzy) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/constants.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/constants.py deleted file mode 100644 index ed2ee37de7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/constants.py +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Constants variables for test module.""" - -import os - -# Record some task without example in directory `example`. Some of them maybe can not write example, -# but most of them just without adding by mistake, and we should add it later. -task_without_example = { - "sql", - "http", - "sub_process", - "python", - "procedure", -} - -# The examples ignore test to run it. Those examples could not be run directly cause it need other -# support like resource files, data source and etc. But we should try to run them later for more coverage -ignore_exec_examples = { - "task_datax_example", - "task_flink_example", - "task_map_reduce_example", - "task_spark_example", -} - -# pydolphinscheduler environment home -ENV_PYDS_HOME = "PYDS_HOME" - -# whether in dev mode, if true we will add or remove some tests. Or make be and more detail infos when -# test failed. -DEV_MODE = str( - os.environ.get("PY_DOLPHINSCHEDULER_DEV_MODE", False) -).strip().lower() in {"true", "t", "1"} diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/decorator.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/decorator.py deleted file mode 100644 index 78078ee863..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/decorator.py +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Decorator module for testing module.""" - -import types -from functools import wraps - - -def foo(func: types.FunctionType): - """Decorate which do nothing for testing module.""" - - @wraps(func) - def wrapper(): - print("foo decorator called.") - func() - - return wrapper diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/docker_wrapper.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/docker_wrapper.py deleted file mode 100644 index a3d0b6ea7a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/docker_wrapper.py +++ /dev/null @@ -1,98 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Wrap docker commands for easier create docker container.""" - -import time -from typing import Optional - -import docker -from docker.errors import ImageNotFound -from docker.models.containers import Container - - -class DockerWrapper: - """Wrap docker commands for easier create docker container. - - :param image: The image to create docker container. - """ - - def __init__(self, image: str, container_name: str): - self._client = docker.from_env() - self.image = image - self.container_name = container_name - - def run(self, *args, **kwargs) -> Container: - """Create and run a new container. - - This method would return immediately after the container started, if you wish it return container - object when specific service start, you could see :func:`run_until_log` which return container - object when specific output log appear in docker. - """ - if not self.images_exists: - raise ValueError("Docker image named %s do not exists.", self.image) - return self._client.containers.run( - image=self.image, name=self.container_name, detach=True, *args, **kwargs - ) - - def run_until_log( - self, log: str, remove_exists: Optional[bool] = True, *args, **kwargs - ) -> Container: - """Create and run a new container, return when specific log appear. - - It will call :func:`run` inside this method. And after container started, it would not - return it immediately but run command `docker logs` to see whether specific log appear. - It will raise `RuntimeError` when 10 minutes after but specific log do not appear. - """ - if remove_exists: - self.remove_container() - - log_byte = str.encode(log) - container = self.run(*args, **kwargs) - - timeout_threshold = 10 * 60 - start_time = time.time() - while time.time() <= start_time + timeout_threshold: - if log_byte in container.logs(tail=1000): - break - time.sleep(2) - # Stop container and raise error when reach timeout threshold but do not appear specific log output - else: - container.remove(force=True) - raise RuntimeError( - "Can not capture specific log `%s` in %d seconds, remove container.", - (log, timeout_threshold), - ) - return container - - def remove_container(self): - """Remove container which already running.""" - containers = self._client.containers.list( - all=True, filters={"name": self.container_name} - ) - if containers: - for container in containers: - container.remove(force=True) - - @property - def images_exists(self) -> bool: - """Check whether the image exists in local docker repository or not.""" - try: - self._client.images.get(self.image) - return True - except ImageNotFound: - return False diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/file.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/file.py deleted file mode 100644 index 82e083758f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/file.py +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Testing util about file operating.""" - -from pathlib import Path -from typing import Union - - -def get_file_content(path: Union[str, Path]) -> str: - """Get file content in given path.""" - with open(path, mode="r") as f: - return f.read() - - -def delete_file(path: Union[str, Path]) -> None: - """Delete file in given path.""" - path = Path(path).expanduser() if isinstance(path, str) else path.expanduser() - if path.exists(): - path.unlink() diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/path.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/path.py deleted file mode 100644 index 974ab3d47c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/path.py +++ /dev/null @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Handle path related issue in test module.""" - -from pathlib import Path -from typing import Any, Generator - -project_root = Path(__file__).parent.parent.parent - -path_code_tasks = project_root.joinpath("src", "pydolphinscheduler", "tasks") -path_example = project_root.joinpath("src", "pydolphinscheduler", "examples") -path_yaml_example = project_root.joinpath("examples", "yaml_define") -path_doc_tasks = project_root.joinpath("docs", "source", "tasks") -path_default_config_yaml = project_root.joinpath( - "src", "pydolphinscheduler", "default_config.yaml" -) - - -def get_all_examples() -> Generator[Path, Any, None]: - """Get all examples files path in examples directory.""" - return (ex for ex in path_example.iterdir() if ex.is_file()) - - -def get_tasks(ignore_name: set = None) -> Generator[Path, Any, None]: - """Get all tasks files path in src/pydolphinscheduler/tasks directory.""" - if not ignore_name: - ignore_name = set() - return ( - ex - for ex in path_code_tasks.iterdir() - if ex.is_file() and ex.name not in ignore_name - ) - - -def get_doc_tasks(ignore_name: set = None) -> Generator[Path, Any, None]: - """Get all tasks document path in docs/source/tasks directory.""" - if not ignore_name: - ignore_name = set() - return ( - ex - for ex in path_doc_tasks.iterdir() - if ex.is_file() and ex.name not in ignore_name - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/task.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/task.py deleted file mode 100644 index 11ffbf1e6f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/task.py +++ /dev/null @@ -1,47 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Mock class Task for other test.""" - -import uuid - -from pydolphinscheduler.core.task import Task as SourceTask - - -class Task(SourceTask): - """Mock class :class:`pydolphinscheduler.core.task.Task` for unittest.""" - - DEFAULT_VERSION = 1 - - def gen_code_and_version(self): - """Mock java gateway code and version, convenience method for unittest.""" - return uuid.uuid1().time, self.DEFAULT_VERSION - - -class TaskWithCode(SourceTask): - """Mock class :class:`pydolphinscheduler.core.task.Task` and it return some code and version.""" - - def __init__( - self, name: str, task_type: str, code: int, version: int, *args, **kwargs - ): - self._constant_code = code - self._constant_version = version - super().__init__(name, task_type, *args, **kwargs) - - def gen_code_and_version(self): - """Mock java gateway code and version, convenience method for unittest.""" - return self._constant_code, self._constant_version diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/utils/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/utils/__init__.py deleted file mode 100644 index 119f825bc0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/utils/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init tests for utils package.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_date.py b/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_date.py deleted file mode 100644 index b9f8ce5ff3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_date.py +++ /dev/null @@ -1,78 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test utils.date module.""" - -from datetime import datetime - -import pytest - -from pydolphinscheduler.utils.date import FMT_STD, conv_from_str, conv_to_schedule - -curr_date = datetime.now() - - -@pytest.mark.parametrize( - "src,expect", - [ - (curr_date, curr_date.strftime(FMT_STD)), - (datetime(2021, 1, 1), "2021-01-01 00:00:00"), - (datetime(2021, 1, 1, 1), "2021-01-01 01:00:00"), - (datetime(2021, 1, 1, 1, 1), "2021-01-01 01:01:00"), - (datetime(2021, 1, 1, 1, 1, 1), "2021-01-01 01:01:01"), - (datetime(2021, 1, 1, 1, 1, 1, 1), "2021-01-01 01:01:01"), - ], -) -def test_conv_to_schedule(src: datetime, expect: str) -> None: - """Test function conv_to_schedule.""" - assert expect == conv_to_schedule(src) - - -@pytest.mark.parametrize( - "src,expect", - [ - ("2021-01-01", datetime(2021, 1, 1)), - ("2021/01/01", datetime(2021, 1, 1)), - ("20210101", datetime(2021, 1, 1)), - ("2021-01-01 01:01:01", datetime(2021, 1, 1, 1, 1, 1)), - ("2021/01/01 01:01:01", datetime(2021, 1, 1, 1, 1, 1)), - ("20210101 010101", datetime(2021, 1, 1, 1, 1, 1)), - ], -) -def test_conv_from_str_success(src: str, expect: datetime) -> None: - """Test function conv_from_str success case.""" - assert expect == conv_from_str( - src - ), f"Function conv_from_str convert {src} not expect to {expect}." - - -@pytest.mark.parametrize( - "src", - [ - "2021-01-01 010101", - "2021:01:01", - "202111", - "20210101010101", - "2021:01:01 01:01:01", - ], -) -def test_conv_from_str_not_impl(src: str) -> None: - """Test function conv_from_str fail case.""" - with pytest.raises( - NotImplementedError, match=".*? could not be convert to datetime for now." - ): - conv_from_str(src) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_file.py b/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_file.py deleted file mode 100644 index 4cc6df402f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_file.py +++ /dev/null @@ -1,85 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test file utils.""" - -import shutil -from pathlib import Path - -import pytest - -from pydolphinscheduler.utils import file -from tests.testing.file import delete_file, get_file_content - -content = "test_content" -file_path = "/tmp/test/file/test_file_write.txt" - - -@pytest.fixture -def teardown_del_file(): - """Teardown about delete file.""" - yield - delete_file(file_path) - - -@pytest.fixture -def setup_crt_first(): - """Set up and teardown about create file first and then delete it.""" - file.write(content=content, to_path=file_path) - yield - delete_file(file_path) - - -def test_write_content(teardown_del_file): - """Test function :func:`write` on write behavior with correct content.""" - assert not Path(file_path).exists() - file.write(content=content, to_path=file_path) - assert Path(file_path).exists() - assert content == get_file_content(file_path) - - -def test_write_not_create_parent(teardown_del_file): - """Test function :func:`write` with parent not exists and do not create path.""" - file_test_dir = Path(file_path).parent - if file_test_dir.exists(): - shutil.rmtree(str(file_test_dir)) - assert not file_test_dir.exists() - with pytest.raises( - ValueError, - match="Parent directory do not exists and set param `create` to `False`", - ): - file.write(content=content, to_path=file_path, create=False) - - -def test_write_overwrite(setup_crt_first): - """Test success with file exists but set ``True`` to overwrite.""" - assert Path(file_path).exists() - - new_content = f"new_{content}" - file.write(content=new_content, to_path=file_path, overwrite=True) - assert new_content == get_file_content(file_path) - - -def test_write_overwrite_error(setup_crt_first): - """Test error with file exists but set ``False`` to overwrite.""" - assert Path(file_path).exists() - - new_content = f"new_{content}" - with pytest.raises( - FileExistsError, match=".*already exists and you choose not overwrite mode\\." - ): - file.write(content=new_content, to_path=file_path) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_string.py b/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_string.py deleted file mode 100644 index 2ccd206df1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_string.py +++ /dev/null @@ -1,87 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test utils.string module.""" - -import pytest - -from pydolphinscheduler.utils.string import attr2camel, class_name2camel, snake2camel - - -@pytest.mark.parametrize( - "snake, expect", - [ - ("snake_case", "snakeCase"), - ("snake_123case", "snake123Case"), - ("snake_c_a_s_e", "snakeCASE"), - ("snake__case", "snakeCase"), - ("snake_case_case", "snakeCaseCase"), - ("_snake_case", "SnakeCase"), - ("__snake_case", "SnakeCase"), - ("Snake_case", "SnakeCase"), - ], -) -def test_snake2camel(snake: str, expect: str): - """Test function snake2camel, this is a base function for utils.string.""" - assert expect == snake2camel( - snake - ), f"Test case {snake} do no return expect result {expect}." - - -@pytest.mark.parametrize( - "attr, expects", - [ - # source attribute, (true expect, false expect), - ("snake_case", ("snakeCase", "snakeCase")), - ("snake_123case", ("snake123Case", "snake123Case")), - ("snake_c_a_s_e", ("snakeCASE", "snakeCASE")), - ("snake__case", ("snakeCase", "snakeCase")), - ("snake_case_case", ("snakeCaseCase", "snakeCaseCase")), - ("_snake_case", ("snakeCase", "SnakeCase")), - ("__snake_case", ("snakeCase", "SnakeCase")), - ("Snake_case", ("SnakeCase", "SnakeCase")), - ], -) -def test_attr2camel(attr: str, expects: tuple): - """Test function attr2camel.""" - for idx, expect in enumerate(expects): - include_private = idx % 2 == 0 - assert expect == attr2camel( - attr, include_private - ), f"Test case {attr} do no return expect result {expect} when include_private is {include_private}." - - -@pytest.mark.parametrize( - "class_name, expect", - [ - ("snake_case", "snakeCase"), - ("snake_123case", "snake123Case"), - ("snake_c_a_s_e", "snakeCASE"), - ("snake__case", "snakeCase"), - ("snake_case_case", "snakeCaseCase"), - ("_snake_case", "snakeCase"), - ("_Snake_case", "snakeCase"), - ("__snake_case", "snakeCase"), - ("__Snake_case", "snakeCase"), - ("Snake_case", "snakeCase"), - ], -) -def test_class_name2camel(class_name: str, expect: str): - """Test function class_name2camel.""" - assert expect == class_name2camel( - class_name - ), f"Test case {class_name} do no return expect result {expect}." diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_yaml_parser.py b/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_yaml_parser.py deleted file mode 100644 index ad3aaf7bd1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_yaml_parser.py +++ /dev/null @@ -1,255 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test utils.path_dict module.""" - -from typing import Dict - -import pytest -from ruamel.yaml import YAML - -from pydolphinscheduler.utils.yaml_parser import YamlParser -from tests.testing.path import path_default_config_yaml - -yaml = YAML() - -expects = [ - { - # yaml.load("no need test") is a flag about skipping it because it to different to maintainer - "name": yaml.load("no need test"), - "name.family": ("Smith", "SmithEdit"), - "name.given": ("Alice", "AliceEdit"), - "name.mark": yaml.load("no need test"), - "name.mark.name_mark": yaml.load("no need test"), - "name.mark.name_mark.key": ("value", "valueEdit"), - }, - { - # yaml.load("no need test") is a flag about skipping it because it to different to maintainer - "java_gateway": yaml.load("no need test"), - "java_gateway.address": ("127.0.0.1", "127.1.1.1"), - "java_gateway.port": (25333, 25555), - "java_gateway.auto_convert": (True, False), - "default": yaml.load("no need test"), - "default.user": yaml.load("no need test"), - "default.user.name": ("userPythonGateway", "userPythonGatewayEdit"), - "default.user.password": ("userPythonGateway", "userPythonGatewayEdit"), - "default.user.email": ( - "userPythonGateway@dolphinscheduler.com", - "userEdit@dolphinscheduler.com", - ), - "default.user.tenant": ("tenant_pydolphin", "tenant_pydolphinEdit"), - "default.user.phone": (11111111111, 22222222222), - "default.user.state": (1, 0), - "default.workflow": yaml.load("no need test"), - "default.workflow.project": ("project-pydolphin", "project-pydolphinEdit"), - "default.workflow.tenant": ("tenant_pydolphin", "SmithEdit"), - "default.workflow.user": ("userPythonGateway", "SmithEdit"), - "default.workflow.queue": ("queuePythonGateway", "queueEdit"), - "default.workflow.worker_group": ("default", "wgEdit"), - "default.workflow.release_state": ("online", "offline"), - "default.workflow.time_zone": ("Asia/Shanghai", "Europe/Amsterdam"), - "default.workflow.warning_type": ("NONE", "SUCCESS"), - }, -] - -param = [ - """#example -name: - # details - family: Smith # very common - given: Alice # one of the siblings - mark: - name_mark: - key: value -""" -] - -with open(path_default_config_yaml, "r") as f: - param.append(f.read()) - - -@pytest.mark.parametrize( - "src, delimiter, expect", - [ - ( - param[0], - "|", - expects[0], - ), - ( - param[1], - "/", - expects[1], - ), - ], -) -def test_yaml_parser_specific_delimiter(src: str, delimiter: str, expect: Dict): - """Test specific delimiter for :class:`YamlParser`.""" - - def ch_dl(key): - return key.replace(".", delimiter) - - yaml_parser = YamlParser(src, delimiter=delimiter) - assert all( - [ - expect[key][0] == yaml_parser[ch_dl(key)] - for key in expect - if expect[key] != "no need test" - ] - ) - assert all( - [ - expect[key][0] == yaml_parser.get(ch_dl(key)) - for key in expect - if expect[key] != "no need test" - ] - ) - - -@pytest.mark.parametrize( - "src, expect", - [ - ( - param[0], - expects[0], - ), - ( - param[1], - expects[1], - ), - ], -) -def test_yaml_parser_contains(src: str, expect: Dict): - """Test magic function :func:`YamlParser.__contain__` also with `key in obj` syntax.""" - yaml_parser = YamlParser(src) - assert len(expect.keys()) == len( - yaml_parser.dict_parser.keys() - ), "Parser keys length not equal to expect keys length" - assert all( - [key in yaml_parser for key in expect] - ), "Parser keys not equal to expect keys" - - -@pytest.mark.parametrize( - "src, expect", - [ - ( - param[0], - expects[0], - ), - ( - param[1], - expects[1], - ), - ], -) -def test_yaml_parser_get(src: str, expect: Dict): - """Test magic function :func:`YamlParser.__getitem__` also with `obj[key]` syntax.""" - yaml_parser = YamlParser(src) - assert all( - [ - expect[key][0] == yaml_parser[key] - for key in expect - if expect[key] != "no need test" - ] - ) - assert all( - [ - expect[key][0] == yaml_parser.get(key) - for key in expect - if expect[key] != "no need test" - ] - ) - - -@pytest.mark.parametrize( - "src, expect", - [ - ( - param[0], - expects[0], - ), - ( - param[1], - expects[1], - ), - ], -) -def test_yaml_parser_set(src: str, expect: Dict): - """Test magic function :func:`YamlParser.__setitem__` also with `obj[key] = val` syntax.""" - yaml_parser = YamlParser(src) - for key in expect: - assert key in yaml_parser.dict_parser.keys() - if expect[key] == "no need test": - continue - assert expect[key][0] == yaml_parser.dict_parser[key] - assert expect[key][1] != yaml_parser.dict_parser[key] - - yaml_parser[key] = expect[key][1] - assert expect[key][0] != yaml_parser.dict_parser[key] - assert expect[key][1] == yaml_parser.dict_parser[key] - - -@pytest.mark.parametrize( - "src, setter, expect", - [ - ( - param[0], - {"name.mark.name_mark.key": "edit"}, - """#example -name: - # details - family: Smith # very common - given: Alice # one of the siblings - mark: - name_mark: - key: edit -""", - ), - ( - param[0], - { - "name.family": "SmithEdit", - "name.given": "AliceEdit", - "name.mark.name_mark.key": "edit", - }, - """#example -name: - # details - family: SmithEdit # very common - given: AliceEdit # one of the siblings - mark: - name_mark: - key: edit -""", - ), - ], -) -def test_yaml_parser_str_repr(src: str, setter: Dict, expect: str): - """Test function :func:`YamlParser.to_string`.""" - yaml_parser = YamlParser(src) - - # Equal before change - assert f"YamlParser({src})" == repr(yaml_parser) - assert src == str(yaml_parser) - - for key, val in setter.items(): - yaml_parser[key] = val - - # Equal after changed - assert expect == str(yaml_parser) - assert f"YamlParser({expect})" == repr(yaml_parser) diff --git a/dolphinscheduler-python/pydolphinscheduler/tox.ini b/dolphinscheduler-python/pydolphinscheduler/tox.ini deleted file mode 100644 index 4ce8043265..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tox.ini +++ /dev/null @@ -1,79 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[tox] -envlist = local-ci, auto-lint, lint, doc-build, doc-build-multi, code-test, integrate-test, local-integrate-test, py{36,37,38,39,310,311} - -[testenv] -allowlist_externals = - make - git - -[testenv:auto-lint] -extras = style -commands = - python -m isort . - python -m black . - python -m autoflake --in-place --remove-all-unused-imports --ignore-init-module-imports --recursive . - -[testenv:lint] -extras = style -commands = - python -m isort --check . - python -m black --check . - python -m flake8 - python -m autoflake --remove-all-unused-imports --ignore-init-module-imports --check --recursive . - -[testenv:code-test] -extras = test -# Run both tests and coverage -commands = - python -m pytest --cov=pydolphinscheduler --cov-config={toxinidir}/.coveragerc tests/ - -[testenv:doc-build] -extras = doc -commands = - make -C {toxinidir}/docs clean - make -C {toxinidir}/docs html - -[testenv:doc-build-multi] -extras = doc -commands = - # Get all tags for `multiversion` subcommand - git fetch --tags - make -C {toxinidir}/docs clean - make -C {toxinidir}/docs multiversion - -[testenv:integrate-test] -extras = test -commands = - python -m pytest tests/integration/ - -[testenv:local-integrate-test] -extras = test -setenv = - skip_launch_docker = true -commands = - {[testenv:integrate-test]commands} - -# local-ci do not build `doc-build-multi` -[testenv:local-ci] -extras = dev -commands = - {[testenv:lint]commands} - {[testenv:code-test]commands} - {[testenv:doc-build]commands} diff --git a/pom.xml b/pom.xml index 4d8d29791a..714af0172e 100755 --- a/pom.xml +++ b/pom.xml @@ -49,7 +49,6 @@ dolphinscheduler-data-quality dolphinscheduler-standalone-server dolphinscheduler-datasource-plugin - dolphinscheduler-python dolphinscheduler-meter dolphinscheduler-master dolphinscheduler-worker @@ -91,8 +90,6 @@ ${project.version} true true - - true true @@ -175,11 +172,6 @@ ${project.version} - - org.apache.dolphinscheduler - dolphinscheduler-python - ${project.version} - org.apache.dolphinscheduler dolphinscheduler-alert-api