From 1347a8f94c45c447df9c5e1d0fcecfe17966b68b Mon Sep 17 00:00:00 2001 From: Jay Chung Date: Wed, 9 Nov 2022 21:20:15 +0800 Subject: [PATCH] [chore] Separate Python API into another repository (#12779) Currently, our Python API code is a module in apache/dolphinscheduler codebase, each time users change Python API code, they need to run all requests CI check for dolphinscheduler and Python API, But if the user does only change Python code, it could be merged if Python API CI pass and do not dependent on others CI. Besides, we release Python API as the same version of dolphinscheduler. It is easy for user to match Python API version. But when Python API does not change any code, but dolphinscheduler release a bugfix version, Python API has to release the new version to match dolphinscheduler. This happened when we released Python API 2.0.6 and 2.0.7. 2.0.6 and 2.0.7 is bugfix version, and Python API does not change any code, so the PyPI package is the same. Separate Python API also makes our code more sense, we will have more distinguished code in dolphinscheduler and Python API new repository. Have separate issue tracker and changelog for information to users. ref PR in other repository: apache/dolphinscheduler-sdk-python#1 see more detail in mail thread: https://lists.apache.org/thread/4z7l5l54c4d81smjlk1n8nq380p9f0oo --- .../pydolphinscheduler/.flake8 => .flake8 | 12 - .github/CODEOWNERS | 1 - .github/actions/labeler/labeler.yml | 4 - .github/workflows/py-ci.yml | 205 ----- .github/workflows/unit-test.yml | 1 - .gitignore | 15 - .pre-commit-config.yaml | 5 - README.md | 7 +- .../en/contribute/release/release-post.md | 7 +- .../en/contribute/release/release-prepare.md | 1 - docs/docs/en/contribute/release/release.md | 34 +- .../zh/contribute/release/release-post.md | 7 +- .../zh/contribute/release/release-prepare.md | 1 - docs/docs/zh/contribute/release/release.md | 34 +- dolphinscheduler-api/pom.xml | 2 +- dolphinscheduler-dist/pom.xml | 35 - .../assembly/dolphinscheduler-python-api.xml | 34 - .../main/assembly/dolphinscheduler-src.xml | 7 - dolphinscheduler-python/pom.xml | 165 ---- .../pydolphinscheduler/.coveragerc | 34 - .../pydolphinscheduler/.isort.cfg | 19 - .../pydolphinscheduler/DEVELOP.md | 265 ------ .../pydolphinscheduler/LICENSE | 228 ----- .../pydolphinscheduler/NOTICE | 5 - .../pydolphinscheduler/README.md | 90 -- .../pydolphinscheduler/RELEASE.md | 35 - .../pydolphinscheduler/UPDATING.md | 40 - .../pydolphinscheduler/docs/Makefile | 44 - .../pydolphinscheduler/docs/make.bat | 54 -- .../docs/source/_static/.gitkeep | 0 .../docs/source/_templates/versioning.html | 27 - .../docs/source/_templates/versions.html | 46 - .../pydolphinscheduler/docs/source/api.rst | 47 -- .../pydolphinscheduler/docs/source/cli.rst | 36 - .../docs/source/concept.rst | 151 ---- .../pydolphinscheduler/docs/source/conf.py | 121 --- .../pydolphinscheduler/docs/source/config.rst | 218 ----- .../docs/source/howto/index.rst | 30 - .../docs/source/howto/remote-submit.rst | 51 -- .../pydolphinscheduler/docs/source/index.rst | 46 - .../docs/source/resources_plugin/develop.rst | 46 - .../docs/source/resources_plugin/github.rst | 35 - .../docs/source/resources_plugin/gitlab.rst | 46 - .../docs/source/resources_plugin/index.rst | 32 - .../docs/source/resources_plugin/local.rst | 32 - .../docs/source/resources_plugin/oss.rst | 44 - .../resources_plugin/resource-plugin.rst | 75 -- .../docs/source/resources_plugin/s3.rst | 36 - .../pydolphinscheduler/docs/source/start.rst | 171 ---- .../docs/source/tasks/condition.rst | 40 - .../docs/source/tasks/datax.rst | 46 - .../docs/source/tasks/dependent.rst | 47 -- .../docs/source/tasks/dvc.rst | 41 - .../docs/source/tasks/flink.rst | 40 - .../docs/source/tasks/func_wrap.rst | 33 - .../docs/source/tasks/http.rst | 29 - .../docs/source/tasks/index.rst | 48 -- .../docs/source/tasks/map_reduce.rst | 42 - .../docs/source/tasks/mlflow.rst | 42 - .../docs/source/tasks/openmldb.rst | 42 - .../docs/source/tasks/procedure.rst | 29 - .../docs/source/tasks/python.rst | 29 - .../docs/source/tasks/pytorch.rst | 42 - .../docs/source/tasks/sagemaker.rst | 46 - .../docs/source/tasks/shell.rst | 41 - .../docs/source/tasks/spark.rst | 41 - .../docs/source/tasks/sql.rst | 35 - .../docs/source/tasks/sub_process.rst | 38 - .../docs/source/tasks/switch.rst | 42 - .../docs/source/tutorial.rst | 319 ------- .../examples/yaml_define/Condition.yaml | 43 - .../examples/yaml_define/DataX.yaml | 33 - .../examples/yaml_define/Dependent.yaml | 76 -- .../yaml_define/Dependent_External.yaml | 26 - .../examples/yaml_define/Dvc.yaml | 46 - .../examples/yaml_define/Flink.yaml | 29 - .../examples/yaml_define/Http.yaml | 37 - .../examples/yaml_define/MapReduce.yaml | 29 - .../yaml_define/MoreConfiguration.yaml | 40 - .../examples/yaml_define/OpenMLDB.yaml | 33 - .../examples/yaml_define/Procedure.yaml | 27 - .../examples/yaml_define/Python.yaml | 30 - .../examples/yaml_define/Pytorch.yaml | 53 -- .../examples/yaml_define/Sagemaker.yaml | 28 - .../examples/yaml_define/Shell.yaml | 40 - .../examples/yaml_define/Spark.yaml | 29 - .../examples/yaml_define/Sql.yaml | 45 - .../examples/yaml_define/SubProcess.yaml | 27 - .../examples/yaml_define/Switch.yaml | 39 - .../examples/yaml_define/example_datax.json | 62 -- .../yaml_define/example_sagemaker_params.json | 18 - .../examples/yaml_define/example_sql.sql | 22 - .../yaml_define/example_sub_workflow.yaml | 26 - .../examples/yaml_define/mlflow.yaml | 69 -- .../examples/yaml_define/tutorial.yaml | 46 - .../pydolphinscheduler/pytest.ini | 21 - .../pydolphinscheduler/setup.cfg | 16 - .../pydolphinscheduler/setup.py | 198 ----- .../src/pydolphinscheduler/__init__.py | 22 - .../src/pydolphinscheduler/cli/__init__.py | 18 - .../src/pydolphinscheduler/cli/commands.py | 106 --- .../src/pydolphinscheduler/configuration.py | 193 ----- .../src/pydolphinscheduler/constants.py | 122 --- .../src/pydolphinscheduler/core/__init__.py | 30 - .../src/pydolphinscheduler/core/database.py | 62 -- .../src/pydolphinscheduler/core/engine.py | 94 --- .../core/process_definition.py | 424 ---------- .../src/pydolphinscheduler/core/resource.py | 73 -- .../core/resource_plugin.py | 58 -- .../src/pydolphinscheduler/core/task.py | 384 --------- .../core/yaml_process_define.py | 466 ---------- .../pydolphinscheduler/default_config.yaml | 58 -- .../pydolphinscheduler/examples/__init__.py | 18 - .../examples/bulk_create_example.py | 55 -- .../examples/task_condition_example.py | 59 -- .../examples/task_datax_example.py | 95 --- .../examples/task_dependent_example.py | 74 -- .../examples/task_dvc_example.py | 52 -- .../examples/task_flink_example.py | 33 - .../examples/task_map_reduce_example.py | 34 - .../examples/task_mlflow_example.py | 93 -- .../examples/task_openmldb_example.py | 43 - .../examples/task_pytorch_example.py | 62 -- .../examples/task_sagemaker_example.py | 46 - .../examples/task_spark_example.py | 33 - .../examples/task_switch_example.py | 51 -- .../pydolphinscheduler/examples/tutorial.py | 68 -- .../examples/tutorial_decorator.py | 91 -- .../examples/tutorial_resource_plugin.py | 64 -- .../src/pydolphinscheduler/exceptions.py | 46 - .../src/pydolphinscheduler/java_gateway.py | 308 ------- .../src/pydolphinscheduler/models/__init__.py | 36 - .../src/pydolphinscheduler/models/base.py | 74 -- .../pydolphinscheduler/models/base_side.py | 48 -- .../src/pydolphinscheduler/models/project.py | 72 -- .../src/pydolphinscheduler/models/queue.py | 34 - .../src/pydolphinscheduler/models/tenant.py | 80 -- .../src/pydolphinscheduler/models/user.py | 130 --- .../pydolphinscheduler/models/worker_group.py | 30 - .../resources_plugin/__init__.py | 25 - .../resources_plugin/base/__init__.py | 18 - .../resources_plugin/base/bucket.py | 86 -- .../resources_plugin/base/git.py | 115 --- .../resources_plugin/github.py | 106 --- .../resources_plugin/gitlab.py | 112 --- .../resources_plugin/local.py | 56 -- .../resources_plugin/oss.py | 76 -- .../pydolphinscheduler/resources_plugin/s3.py | 74 -- .../src/pydolphinscheduler/tasks/__init__.py | 69 -- .../src/pydolphinscheduler/tasks/condition.py | 204 ----- .../src/pydolphinscheduler/tasks/datax.py | 127 --- .../src/pydolphinscheduler/tasks/dependent.py | 273 ------ .../src/pydolphinscheduler/tasks/dvc.py | 124 --- .../src/pydolphinscheduler/tasks/flink.py | 93 -- .../src/pydolphinscheduler/tasks/func_wrap.py | 61 -- .../src/pydolphinscheduler/tasks/http.py | 101 --- .../pydolphinscheduler/tasks/map_reduce.py | 52 -- .../src/pydolphinscheduler/tasks/mlflow.py | 256 ------ .../src/pydolphinscheduler/tasks/openmldb.py | 48 -- .../src/pydolphinscheduler/tasks/procedure.py | 60 -- .../src/pydolphinscheduler/tasks/python.py | 105 --- .../src/pydolphinscheduler/tasks/pytorch.py | 95 --- .../src/pydolphinscheduler/tasks/sagemaker.py | 40 - .../src/pydolphinscheduler/tasks/shell.py | 58 -- .../src/pydolphinscheduler/tasks/spark.py | 84 -- .../src/pydolphinscheduler/tasks/sql.py | 122 --- .../pydolphinscheduler/tasks/sub_process.py | 54 -- .../src/pydolphinscheduler/tasks/switch.py | 166 ---- .../src/pydolphinscheduler/utils/__init__.py | 18 - .../src/pydolphinscheduler/utils/date.py | 82 -- .../src/pydolphinscheduler/utils/file.py | 57 -- .../src/pydolphinscheduler/utils/string.py | 39 - .../pydolphinscheduler/utils/yaml_parser.py | 159 ---- .../pydolphinscheduler/tests/__init__.py | 18 - .../pydolphinscheduler/tests/cli/__init__.py | 18 - .../tests/cli/test_config.py | 198 ----- .../tests/cli/test_version.py | 67 -- .../pydolphinscheduler/tests/core/__init__.py | 18 - .../tests/core/test_configuration.py | 272 ------ .../tests/core/test_database.py | 54 -- .../tests/core/test_default_config_yaml.py | 39 - .../tests/core/test_engine.py | 148 ---- .../tests/core/test_process_definition.py | 502 ----------- .../tests/core/test_resource_definition.py | 68 -- .../tests/core/test_task.py | 470 ----------- .../tests/core/test_yaml_process_define.py | 191 ----- .../tests/example/__init__.py | 18 - .../tests/example/test_example.py | 176 ---- .../tests/integration/__init__.py | 18 - .../tests/integration/conftest.py | 51 -- .../tests/integration/test_java_gateway.py | 53 -- .../integration/test_process_definition.py | 50 -- .../tests/integration/test_project.py | 78 -- .../tests/integration/test_submit_examples.py | 56 -- .../tests/integration/test_tenant.py | 86 -- .../tests/integration/test_user.py | 107 --- .../tests/resources_plugin/__init__.py | 18 - .../tests/resources_plugin/test_github.py | 195 ----- .../tests/resources_plugin/test_gitlab.py | 116 --- .../tests/resources_plugin/test_local.py | 108 --- .../tests/resources_plugin/test_oss.py | 112 --- .../resources_plugin/test_resource_plugin.py | 75 -- .../tests/resources_plugin/test_s3.py | 79 -- .../tests/tasks/__init__.py | 18 - .../tests/tasks/test_condition.py | 461 ---------- .../tests/tasks/test_datax.py | 213 ----- .../tests/tasks/test_dependent.py | 794 ------------------ .../tests/tasks/test_dvc.py | 173 ---- .../tests/tasks/test_flink.py | 83 -- .../tests/tasks/test_func_wrap.py | 169 ---- .../tests/tasks/test_http.py | 145 ---- .../tests/tasks/test_map_reduce.py | 76 -- .../tests/tasks/test_mlflow.py | 205 ----- .../tests/tasks/test_openmldb.py | 73 -- .../tests/tasks/test_procedure.py | 107 --- .../tests/tasks/test_python.py | 201 ----- .../tests/tasks/test_pytorch.py | 124 --- .../tests/tasks/test_sagemaker.py | 102 --- .../tests/tasks/test_shell.py | 133 --- .../tests/tasks/test_spark.py | 82 -- .../tests/tasks/test_sql.py | 208 ----- .../tests/tasks/test_sub_process.py | 115 --- .../tests/tasks/test_switch.py | 299 ------- .../pydolphinscheduler/tests/test_docs.py | 59 -- .../tests/testing/__init__.py | 18 - .../pydolphinscheduler/tests/testing/cli.py | 87 -- .../tests/testing/constants.py | 48 -- .../tests/testing/decorator.py | 32 - .../tests/testing/docker_wrapper.py | 98 --- .../pydolphinscheduler/tests/testing/file.py | 34 - .../pydolphinscheduler/tests/testing/path.py | 58 -- .../pydolphinscheduler/tests/testing/task.py | 47 -- .../tests/utils/__init__.py | 18 - .../tests/utils/test_date.py | 78 -- .../tests/utils/test_file.py | 85 -- .../tests/utils/test_string.py | 87 -- .../tests/utils/test_yaml_parser.py | 255 ------ .../pydolphinscheduler/tox.ini | 79 -- pom.xml | 8 - 239 files changed, 19 insertions(+), 20447 deletions(-) rename dolphinscheduler-python/pydolphinscheduler/.flake8 => .flake8 (81%) delete mode 100644 .github/workflows/py-ci.yml delete mode 100644 dolphinscheduler-dist/src/main/assembly/dolphinscheduler-python-api.xml delete mode 100644 dolphinscheduler-python/pom.xml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/.coveragerc delete mode 100644 dolphinscheduler-python/pydolphinscheduler/.isort.cfg delete mode 100644 dolphinscheduler-python/pydolphinscheduler/DEVELOP.md delete mode 100644 dolphinscheduler-python/pydolphinscheduler/LICENSE delete mode 100644 dolphinscheduler-python/pydolphinscheduler/NOTICE delete mode 100644 dolphinscheduler-python/pydolphinscheduler/README.md delete mode 100644 dolphinscheduler-python/pydolphinscheduler/RELEASE.md delete mode 100644 dolphinscheduler-python/pydolphinscheduler/UPDATING.md delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/Makefile delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/make.bat delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/_static/.gitkeep delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versioning.html delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versions.html delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/cli.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/config.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/howto/index.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/howto/remote-submit.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/index.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/develop.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/github.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/gitlab.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/index.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/local.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/oss.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/resource-plugin.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/s3.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dvc.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/func_wrap.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/mlflow.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/openmldb.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/pytorch.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sagemaker.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Condition.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/DataX.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent_External.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dvc.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Flink.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Http.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MapReduce.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MoreConfiguration.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/OpenMLDB.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Procedure.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Python.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Pytorch.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sagemaker.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Shell.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Spark.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sql.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/SubProcess.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Switch.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_datax.json delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sagemaker_params.json delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sql.sql delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sub_workflow.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/tutorial.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/pytest.ini delete mode 100644 dolphinscheduler-python/pydolphinscheduler/setup.cfg delete mode 100644 dolphinscheduler-python/pydolphinscheduler/setup.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/commands.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/configuration.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/database.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/engine.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/process_definition.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource_plugin.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/yaml_process_define.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/default_config.yaml delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/bulk_create_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_condition_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_datax_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dependent_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dvc_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_flink_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_map_reduce_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_openmldb_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_pytorch_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_sagemaker_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_spark_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_switch_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_decorator.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_resource_plugin.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/exceptions.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/java_gateway.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base_side.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/project.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/queue.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/tenant.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/user.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/worker_group.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/bucket.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/git.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/gitlab.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/local.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/oss.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/s3.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/datax.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dependent.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dvc.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/flink.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/func_wrap.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/http.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/map_reduce.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/openmldb.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/procedure.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/python.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/pytorch.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sagemaker.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/spark.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sub_process.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/switch.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/date.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/file.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/string.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/yaml_parser.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/cli/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/cli/test_config.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/cli/test_version.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/core/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/core/test_configuration.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/core/test_database.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/core/test_default_config_yaml.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/core/test_engine.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/core/test_process_definition.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/core/test_resource_definition.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/core/test_yaml_process_define.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/example/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/integration/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/integration/conftest.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/integration/test_java_gateway.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/integration/test_process_definition.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/integration/test_project.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/integration/test_submit_examples.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/integration/test_tenant.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/integration/test_user.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_gitlab.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_local.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_oss.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_resource_plugin.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_s3.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_datax.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dependent.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dvc.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_flink.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_func_wrap.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_http.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_map_reduce.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_openmldb.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_procedure.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_python.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_pytorch.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sagemaker.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_shell.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_spark.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sub_process.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_switch.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/test_docs.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/testing/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/testing/cli.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/testing/constants.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/testing/decorator.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/testing/docker_wrapper.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/testing/file.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/testing/path.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/testing/task.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/utils/__init__.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/utils/test_date.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/utils/test_file.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/utils/test_string.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tests/utils/test_yaml_parser.py delete mode 100644 dolphinscheduler-python/pydolphinscheduler/tox.ini diff --git a/dolphinscheduler-python/pydolphinscheduler/.flake8 b/.flake8 similarity index 81% rename from dolphinscheduler-python/pydolphinscheduler/.flake8 rename to .flake8 index 120b42fb68..f6829fc382 100644 --- a/dolphinscheduler-python/pydolphinscheduler/.flake8 +++ b/.flake8 @@ -19,15 +19,6 @@ max-line-length = 110 exclude = .git, - __pycache__, - .pytest_cache, - *.egg-info, - docs/source/conf.py - old, - build, - dist, - htmlcov, - .tox, dist, ignore = # It's clear and not need to add docstring @@ -35,6 +26,3 @@ ignore = D105, # D105: Missing docstring in magic method # Conflict to Black W503 # W503: Line breaks before binary operators -per-file-ignores = - */pydolphinscheduler/side/__init__.py:F401 - */pydolphinscheduler/tasks/__init__.py:F401 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b58a97f831..eceda6a97a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -38,7 +38,6 @@ /dolphinscheduler-task-plugin/ @caishunfeng @SbloodyS @zhuangchong /dolphinscheduler-tools/ @caishunfeng @SbloodyS @zhongjiajie @EricGao888 /script/ @caishunfeng @SbloodyS @zhongjiajie @EricGao888 -/dolphinscheduler-python/ @zhongjiajie /dolphinscheduler-ui/ @songjianet @Amy0104 /docs/ @zhongjiajie @Tianqi-Dotes @EricGao888 /licenses/ @kezhenxu94 @zhongjiajie diff --git a/.github/actions/labeler/labeler.yml b/.github/actions/labeler/labeler.yml index 4bb724fed2..fbfcb098fe 100644 --- a/.github/actions/labeler/labeler.yml +++ b/.github/actions/labeler/labeler.yml @@ -15,9 +15,6 @@ # limitations under the License. # -Python: - - any: ['dolphinscheduler-python/**/*'] - backend: - 'dolphinscheduler-alert/**/*' - 'dolphinscheduler-api/**/*' @@ -40,7 +37,6 @@ backend: document: - 'docs/**/*' - - 'dolphinscheduler-python/pydolphinscheduler/docs/**/*' CI&CD: - any: ['.github/**/*'] diff --git a/.github/workflows/py-ci.yml b/.github/workflows/py-ci.yml deleted file mode 100644 index 7e0333efd8..0000000000 --- a/.github/workflows/py-ci.yml +++ /dev/null @@ -1,205 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: Python API - -on: - push: - branches: - - dev - paths: - - 'dolphinscheduler-python/**' - pull_request: - -concurrency: - group: py-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -defaults: - run: - working-directory: dolphinscheduler-python/pydolphinscheduler - -# We have to update setuptools wheel to package with package_data, LICENSE, NOTICE -env: - DEPENDENCES: pip setuptools wheel tox - -jobs: - paths-filter: - name: Python-Path-Filter - runs-on: ubuntu-latest - outputs: - not-docs: ${{ steps.filter.outputs.not-docs }} - py-change: ${{ steps.filter.outputs.py-change }} - steps: - - uses: actions/checkout@v2 - - uses: dorny/paths-filter@b2feaf19c27470162a626bd6fa8438ae5b263721 - id: filter - with: - filters: | - not-docs: - - '!(docs/**)' - py-change: - - 'dolphinscheduler-python/pydolphinscheduler/**' - lint: - name: Lint - if: ${{ (needs.paths-filter.outputs.py-change == 'true') || (github.event_name == 'push') }} - timeout-minutes: 15 - needs: paths-filter - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.7 - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - name: Install Dependences - run: | - python -m pip install --upgrade ${{ env.DEPENDENCES }} - - name: Run All Lint Check - run: | - python -m tox -vv -e lint - pytest: - name: Pytest - timeout-minutes: 15 - needs: lint - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - # YAML parse `3.10` to `3.1`, so we have to add quotes for `'3.10'`, see also: - # https://github.com/actions/setup-python/issues/160#issuecomment-724485470 - python-version: [3.6, 3.7, 3.8, 3.9, '3.10', 3.11-dev] - os: [ubuntu-latest, macOS-latest, windows-latest] - # Skip because dependence [py4j](https://pypi.org/project/py4j/) not work on those environments - exclude: - - os: windows-latest - python-version: '3.10' - - os: windows-latest - python-version: 3.11-dev - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install Dependences - run: | - python -m pip install --upgrade ${{ env.DEPENDENCES }} - - name: Run All Tests - run: | - python -m tox -vv -e code-test - doc-build: - name: Docs Build Test - timeout-minutes: 15 - needs: lint - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - env-list: [doc-build, doc-build-multi] - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.7 - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - name: Install Dependences - run: | - python -m pip install --upgrade ${{ env.DEPENDENCES }} - - name: Run Build Docs Tests ${{ matrix.env-list }} - run: | - python -m tox -vv -e ${{ matrix.env-list }} - local-ci: - name: Local CI - timeout-minutes: 15 - needs: - - pytest - - doc-build - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.7 - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - name: Install Dependences - run: | - python -m pip install --upgrade ${{ env.DEPENDENCES }} - - name: Run Tests Build Docs - run: | - python -m tox -vv -e local-ci - integrate-test: - name: Integrate Test - if: ${{ (needs.paths-filter.outputs.not-docs == 'true') || (github.event_name == 'push') }} - runs-on: ubuntu-latest - needs: paths-filter - timeout-minutes: 30 - steps: - - uses: actions/checkout@v2 - with: - submodules: true - - name: Sanity Check - uses: ./.github/actions/sanity-check - with: - token: ${{ secrets.GITHUB_TOKEN }} - - name: Cache local Maven repository - uses: actions/cache@v3 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-maven- - # Switch to project root directory to run mvnw command - - name: Build Image - working-directory: ./ - run: | - ./mvnw -B clean install \ - -Dmaven.test.skip \ - -Dmaven.javadoc.skip \ - -Dcheckstyle.skip=true \ - -Pdocker,release -Ddocker.tag=ci \ - -pl dolphinscheduler-standalone-server -am - - name: Set up Python 3.7 - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - name: Install Dependences - run: | - python -m pip install --upgrade ${{ env.DEPENDENCES }} - - name: Run Integrate Tests - run: | - python -m tox -vv -e integrate-test - result: - name: Python - runs-on: ubuntu-latest - timeout-minutes: 30 - needs: [ paths-filter, local-ci, integrate-test ] - if: always() - steps: - - name: Status - # We need change CWD to current directory to avoid global default working directory not exists - working-directory: ./ - run: | - if [[ ${{ needs.paths-filter.outputs.not-docs }} == 'false' && ${{ github.event_name }} == 'pull_request' ]]; then - echo "Only document change, skip both python unit and integrate test!" - exit 0 - fi - if [[ ${{ needs.paths-filter.outputs.py-change }} == 'false' && ${{ needs.integrate-test.result }} == 'success' && ${{ github.event_name }} == 'pull_request' ]]; then - echo "No python code change, and integrate test pass!" - exit 0 - fi - if [[ ${{ needs.integrate-test.result }} != 'success' || ${{ needs.local-ci.result }} != 'success' ]]; then - echo "py-ci Failed!" - exit -1 - fi diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 441672839b..6acfa1fc4b 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -23,7 +23,6 @@ on: paths-ignore: - '**/*.md' - 'dolphinscheduler-ui' - - 'dolphinscheduler-python/pydolphinscheduler' branches: - dev diff --git a/.gitignore b/.gitignore index 1082e4b155..e5eccc1308 100644 --- a/.gitignore +++ b/.gitignore @@ -50,18 +50,3 @@ dolphinscheduler-common/test dolphinscheduler-worker/logs dolphinscheduler-master/logs dolphinscheduler-api/logs - -# ------------------ -# pydolphinscheduler -# ------------------ -# Cache -__pycache__/ -.tox/ - -# Build -build/ -*egg-info/ - -# Test coverage -.coverage -htmlcov/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6b36749392..e51d15a16e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -41,11 +41,6 @@ repos: 'flake8-docstrings>=1.6', 'flake8-black>=0.2', ] - # pre-commit run in the root, so we have to point out the full path of configuration - args: [ - --config, - dolphinscheduler-python/pydolphinscheduler/.flake8 - ] - repo: https://github.com/pycqa/autoflake rev: v1.4 hooks: diff --git a/README.md b/README.md index 8c49415866..0b25dda917 100644 --- a/README.md +++ b/README.md @@ -19,10 +19,11 @@ Apache DolphinScheduler is the modern data workflow orchestration platform with The key features for DolphinScheduler are as follows: - Easy to deploy, we provide 4 ways to deploy, such as Standalone deployment,Cluster deployment,Docker / Kubernetes deployment and Rainbond deployment -- Easy to use, there are 3 ways to create workflows: +- Easy to use, there are four ways to create workflows: - Visually, create tasks by dragging and dropping tasks - - Creating workflows by PyDolphinScheduler(Python way) - - Creating workflows through Open API + - [PyDolphinScheduler](https://dolphinscheduler.apache.org/python/dev/index.html), Creating workflows via Python API, aka workflow-as-code + - Yaml definition, mapping yaml into workflow(have to install PyDolphinScheduler currently) + - Open API, Creating workflows - Highly Reliable, DolphinScheduler uses a decentralized multi-master and multi-worker architecture, which naturally supports horizontal scaling and high availability diff --git a/docs/docs/en/contribute/release/release-post.md b/docs/docs/en/contribute/release/release-post.md index 8d24b3a80f..20a8e43008 100644 --- a/docs/docs/en/contribute/release/release-post.md +++ b/docs/docs/en/contribute/release/release-post.md @@ -1,7 +1,7 @@ # Release Post We still have some publish task to do after we send the announcement mail, currently we have to publish Docker images to -Docker Hub and also publish pydolphinscheduler to PyPI. +Docker Hub. ## Publish Docker Image @@ -20,11 +20,6 @@ We could reuse the main command the CI run and publish our Docker images to Dock -Pdocker,release ``` -## Publish pydolphinscheduler to PyPI - -Python API need to release to PyPI for easier download and use, you can see more detail in [Python API release](https://github.com/apache/dolphinscheduler/blob/dev/dolphinscheduler-python/pydolphinscheduler/RELEASE.md#to-pypi) -to finish PyPI release. - ## Get All Contributors You might need all contributors in current release when you want to publish the release news or announcement, you could diff --git a/docs/docs/en/contribute/release/release-prepare.md b/docs/docs/en/contribute/release/release-prepare.md index e7fb41c5a1..30bcaae52f 100644 --- a/docs/docs/en/contribute/release/release-prepare.md +++ b/docs/docs/en/contribute/release/release-prepare.md @@ -23,7 +23,6 @@ For example, to release `x.y.z`, the following updates are required: - `deploy/kubernetes/dolphinscheduler`: - `Chart.yaml`: `appVersion` needs to be updated to x.y.z (`version` is helm chart version,incremented and different from x.y.z) - `values.yaml`: `image.tag` needs to be updated to x.y.z - - `dolphinscheduler-python/pydolphinscheduler/setup.py`: change `version` to x.y.z - Version in the docs: - Change the placeholder ``(except `pom`) to the `x.y.z` in directory `docs` - Add new history version diff --git a/docs/docs/en/contribute/release/release.md b/docs/docs/en/contribute/release/release.md index ff2b9fe1e3..dffb5fb4fc 100644 --- a/docs/docs/en/contribute/release/release.md +++ b/docs/docs/en/contribute/release/release.md @@ -10,8 +10,6 @@ all conditions are met, if any or them are missing, you should install them and java -version # Maven requests mvn -version -# Python 3.6 above is requests, and you have to make keyword `python` work in your terminal and version match -python --version ``` ## GPG Settings @@ -166,13 +164,10 @@ git push origin "${VERSION}"-release ### Pre-Release Check ```shell -# make gpg command could be run in maven correct -export GPG_TTY=$(tty) - -mvn release:prepare -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DdryRun=true -Dusername="${GH_USERNAME}" +mvn release:prepare -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DdryRun=true -Dusername="${GH_USERNAME}" ``` -* `-Prelease,python`: choose release and python profile, which will pack all the source codes, jar files and executable binary packages, and Python distribute package. +* `-Prelease`: choose release profile, which will pack all the source codes, jar files and executable binary packages. * `-DautoVersionSubmodules=true`: it can make the version number is inputted only once and not for each sub-module. * `-DdryRun=true`: dry run which means not to generate or submit new version number and new tag. @@ -187,7 +182,7 @@ mvn release:clean Then, prepare to execute the release. ```shell -mvn release:prepare -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DpushChanges=false -Dusername="${GH_USERNAME}" +mvn release:prepare -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DpushChanges=false -Dusername="${GH_USERNAME}" ``` It is basically the same as the previous rehearsal command, but deleting `-DdryRun=true` parameter. @@ -219,7 +214,7 @@ git push origin --tags ### Deploy the Release ```shell -mvn release:perform -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -Dusername="${GH_USERNAME}" +mvn release:perform -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -Dusername="${GH_USERNAME}" ``` After that command is executed, the version to be released will be uploaded to Apache staging repository automatically. @@ -267,7 +262,6 @@ Create folder by version number. ```shell mkdir -p ~/ds_svn/dev/dolphinscheduler/"${VERSION}" -mkdir -p ~/ds_svn/dev/dolphinscheduler/"${VERSION}"/python cd ~/ds_svn/dev/dolphinscheduler/"${VERSION}" ``` @@ -277,9 +271,6 @@ Add source code packages, binary packages and executable binary packages to SVN # Source and binary tarball for main code cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/*.tar.gz ~/ds_svn/dev/dolphinscheduler/"${VERSION}" cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/*.tar.gz.asc ~/ds_svn/dev/dolphinscheduler/"${VERSION}" - -# Source and binary tarball for Python API -cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/python/* ~/ds_svn/dev/dolphinscheduler/"${VERSION}"/python ``` ### Generate sign files @@ -287,10 +278,6 @@ cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/python/* ~/ds_svn/dev/dolp ```shell shasum -a 512 apache-dolphinscheduler-"${VERSION}"-src.tar.gz >> apache-dolphinscheduler-"${VERSION}"-src.tar.gz.sha512 shasum -b -a 512 apache-dolphinscheduler-"${VERSION}"-bin.tar.gz >> apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.sha512 -cd python -shasum -a 512 apache-dolphinscheduler-python-"${VERSION}".tar.gz >> apache-dolphinscheduler-python-"${VERSION}".tar.gz.sha512 -shasum -b -a 512 apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl >> apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.sha512 -cd ../ ``` ### Commit to Apache SVN @@ -308,10 +295,6 @@ svn --username="${A_USERNAME}" commit -m "release ${VERSION}" ```shell shasum -c apache-dolphinscheduler-"${VERSION}"-src.tar.gz.sha512 shasum -c apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.sha512 -cd python -shasum -c apache-dolphinscheduler-python-"${VERSION}".tar.gz.sha512 -shasum -c apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.sha512 -cd ../ ``` ### Check gpg Signature @@ -345,10 +328,6 @@ Then, check the gpg signature. ```shell gpg --verify apache-dolphinscheduler-"${VERSION}"-src.tar.gz.asc gpg --verify apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.asc -cd python -gpg --verify apache-dolphinscheduler-python-"${VERSION}".tar.gz.asc -gpg --verify apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.asc -cd ../ ``` > Note: You have to create gpg signature manually when you can not find your `asc` file, the command @@ -359,7 +338,7 @@ cd ../ #### Check source package -Decompress `apache-dolphinscheduler--src.tar.gz` and `python/apache-dolphinscheduler-python-.tar.gz` then check the following items: +Decompress `apache-dolphinscheduler--src.tar.gz` then check the following items: * Check whether source tarball is oversized for including nonessential files * `LICENSE` and `NOTICE` files exist @@ -372,8 +351,7 @@ Decompress `apache-dolphinscheduler--src.tar.gz` and `python/apache-dol #### Check binary packages -Decompress `apache-dolphinscheduler--src.tar.gz` and `python/apache-dolphinscheduler-python--bin.tar.gz` -to check the following items: +Decompress `apache-dolphinscheduler--src.tar.gz` to check the following items: - `LICENSE` and `NOTICE` files exist - Correct year in `NOTICE` file diff --git a/docs/docs/zh/contribute/release/release-post.md b/docs/docs/zh/contribute/release/release-post.md index 783503f659..fe1f7e323f 100644 --- a/docs/docs/zh/contribute/release/release-post.md +++ b/docs/docs/zh/contribute/release/release-post.md @@ -1,6 +1,6 @@ # 发版后续 -发送公告邮件后,我们还有一些发布任务要做,目前我们必须将 Docker 镜像发布到 Docker Hub 和 并且需要将 pydolphinscheduler 发布到 PyPI。 +发送公告邮件后,我们还有一些发布任务要做,目前我们必须将 Docker 镜像发布到 Docker Hub。 ## 发布 Docker 镜像 @@ -19,11 +19,6 @@ -Pdocker,release ``` -## 发布 pydolphinscheduler 到 PyPI - -需要将 Python API 发布到 PyPI,请参考 [Python API release](https://github.com/apache/dolphinscheduler/blob/dev/dolphinscheduler-python/pydolphinscheduler/RELEASE.md#to-pypi) -完成 PyPI 的发版 - ## 获取全部的贡献者 当您想要发布新版本的新闻或公告时,您可能需要当前版本的所有贡献者,您可以在 `tools/release` 中使用命令 `python release.py contributor` 自动生成贡献者 Github id。 diff --git a/docs/docs/zh/contribute/release/release-prepare.md b/docs/docs/zh/contribute/release/release-prepare.md index 9fd8d9dfed..85eea69e6b 100644 --- a/docs/docs/zh/contribute/release/release-prepare.md +++ b/docs/docs/zh/contribute/release/release-prepare.md @@ -23,7 +23,6 @@ - `deploy/kubernetes/dolphinscheduler`: - `Chart.yaml`: `appVersion` 版本更新为 x.y.z (`version` 为 helm chart 版本, 增量更新但不要设置为 x.y.z) - `values.yaml`: `image.tag` 版本更新为 x.y.z - - `dolphinscheduler-python/pydolphinscheduler/setup.py`: 修改其中的 `version` 为 x.y.z - 修改文档(docs模块)中的版本号: - 将 `docs` 文件夹下文件的占位符 `` (除了 pom.xml 相关的) 修改成 `x.y.z` - 新增历史版本 diff --git a/docs/docs/zh/contribute/release/release.md b/docs/docs/zh/contribute/release/release.md index 5b00867b77..f8137ef78a 100644 --- a/docs/docs/zh/contribute/release/release.md +++ b/docs/docs/zh/contribute/release/release.md @@ -9,8 +9,6 @@ java -version # 需要 Maven mvn -version -# 需要 Python 3.6 及以上的版本,并且需要 `python` 关键字能在命令行中运行,且版本符合条件。 -python --version ``` ## GPG设置 @@ -172,14 +170,11 @@ git push origin ${RELEASE.VERSION}-release ### 发布预校验 ```shell -# 保证 python profile 的 gpg 可以正常运行 -export GPG_TTY=$(tty) - # 运行发版校验 -mvn release:prepare -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DdryRun=true -Dusername="${GH_USERNAME}" +mvn release:prepare -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DdryRun=true -Dusername="${GH_USERNAME}" ``` -* `-Prelease,python`: 选择release和python的profile,这个profile会打包所有源码、jar文件以及可执行二进制包,以及Python的二进制包。 +* `-Prelease`: 选择release的profile,这个profile会打包所有源码、jar文件以及可执行二进制包。 * `-DautoVersionSubmodules=true`: 作用是发布过程中版本号只需要输入一次,不必为每个子模块都输入一次。 * `-DdryRun=true`: 演练,即不产生版本号提交,不生成新的tag。 @@ -194,7 +189,7 @@ mvn release:clean 然后准备执行发布。 ```shell -mvn release:prepare -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DpushChanges=false -Dusername="${GH_USERNAME}" +mvn release:prepare -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -DpushChanges=false -Dusername="${GH_USERNAME}" ``` 和上一步演练的命令基本相同,去掉了 `-DdryRun=true` 参数。 @@ -223,7 +218,7 @@ git push origin --tags ### 部署发布 ```shell -mvn release:perform -Prelease,python -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -Dusername="${GH_USERNAME}" +mvn release:perform -Prelease -Darguments="-Dmaven.test.skip=true -Dcheckstyle.skip=true -Dmaven.javadoc.skip=true" -DautoVersionSubmodules=true -Dusername="${GH_USERNAME}" ``` 执行完该命令后,待发布版本会自动上传到Apache的临时筹备仓库(staging repository)。你可以通过访问 [apache staging repositories](https://repository.apache.org/#stagingRepositories) @@ -270,7 +265,6 @@ svn --username="${A_USERNAME}" commit -m "new key add" ```shell mkdir -p ~/ds_svn/dev/dolphinscheduler/"${VERSION}" -mkdir -p ~/ds_svn/dev/dolphinscheduler/"${VERSION}"/python cd ~/ds_svn/dev/dolphinscheduler/"${VERSION}" ``` @@ -280,9 +274,6 @@ cd ~/ds_svn/dev/dolphinscheduler/"${VERSION}" # 主程序源码包和二进制包 cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/*.tar.gz ~/ds_svn/dev/dolphinscheduler/"${VERSION}" cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/*.tar.gz.asc ~/ds_svn/dev/dolphinscheduler/"${VERSION}" - -# Python API 源码和二进制包 -cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/python/* ~/ds_svn/dev/dolphinscheduler/"${VERSION}"/python ``` ### 生成文件签名 @@ -290,10 +281,6 @@ cp -f ~/dolphinscheduler/dolphinscheduler-dist/target/python/* ~/ds_svn/dev/dolp ```shell shasum -a 512 apache-dolphinscheduler-"${VERSION}"-src.tar.gz >> apache-dolphinscheduler-"${VERSION}"-src.tar.gz.sha512 shasum -b -a 512 apache-dolphinscheduler-"${VERSION}"-bin.tar.gz >> apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.sha512 -cd python -shasum -a 512 apache-dolphinscheduler-python-"${VERSION}".tar.gz >> apache-dolphinscheduler-python-"${VERSION}".tar.gz.sha512 -shasum -b -a 512 apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl >> apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.sha512 -cd ../ ``` ### 提交Apache SVN @@ -311,10 +298,6 @@ svn --username="${A_USERNAME}" commit -m "release ${VERSION}" ```shell shasum -c apache-dolphinscheduler-"${VERSION}"-src.tar.gz.sha512 shasum -c apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.sha512 -cd python -shasum -c apache-dolphinscheduler-python-"${VERSION}".tar.gz.sha512 -shasum -c apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.sha512 -cd ../ ``` ### 检查gpg签名 @@ -347,10 +330,6 @@ Your decision? 5 ```shell gpg --verify apache-dolphinscheduler-"${VERSION}"-src.tar.gz.asc gpg --verify apache-dolphinscheduler-"${VERSION}"-bin.tar.gz.asc -cd python -gpg --verify apache-dolphinscheduler-python-"${VERSION}".tar.gz.asc -gpg --verify apache_dolphinscheduler-python-"${VERSION}"-py3-none-any.whl.asc -cd ../ ``` > 注意:当你找不到你的 `asc` 文件时,你必须手动创建 gpg 签名,命令 @@ -361,7 +340,7 @@ cd ../ #### 检查源码包的文件内容 -解压缩`apache-dolphinscheduler--src.tar.gz`以及Python文件夹下的`apache-dolphinscheduler-python-.tar.gz`,进行如下检查: +解压缩`apache-dolphinscheduler--src.tar.gz`,进行如下检查: - 检查源码包是否包含由于包含不必要文件,致使tarball过于庞大 - 存在`LICENSE`和`NOTICE`文件 @@ -373,8 +352,7 @@ cd ../ #### 检查二进制包的文件内容 -解压缩`apache-dolphinscheduler--src.tar.gz`和`apache-dolphinscheduler-python--bin.tar.gz` -进行如下检查: +解压缩`apache-dolphinscheduler--src.tar.gz`进行如下检查: - 存在`LICENSE`和`NOTICE`文件 - 所有文本文件开头都有ASF许可证 diff --git a/dolphinscheduler-api/pom.xml b/dolphinscheduler-api/pom.xml index cc34dcba1e..facd4e4e69 100644 --- a/dolphinscheduler-api/pom.xml +++ b/dolphinscheduler-api/pom.xml @@ -163,7 +163,7 @@ - + net.sf.py4j py4j diff --git a/dolphinscheduler-dist/pom.xml b/dolphinscheduler-dist/pom.xml index b202cdd281..ee4c85589e 100644 --- a/dolphinscheduler-dist/pom.xml +++ b/dolphinscheduler-dist/pom.xml @@ -73,11 +73,6 @@ org.apache.dolphinscheduler dolphinscheduler-tools - - - org.apache.dolphinscheduler - dolphinscheduler-python - @@ -126,35 +121,5 @@ - - - python - - - - maven-assembly-plugin - - - - python - - single - - package - - - python - false - - src/main/assembly/dolphinscheduler-python-api.xml - - - - - - - - - diff --git a/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-python-api.xml b/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-python-api.xml deleted file mode 100644 index cd37acee62..0000000000 --- a/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-python-api.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - python-api - - dir - - false - - - - ${basedir}/../dolphinscheduler-python/pydolphinscheduler/dist - . - - - diff --git a/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-src.xml b/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-src.xml index 05d54871c7..3ccc60ef0a 100644 --- a/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-src.xml +++ b/dolphinscheduler-dist/src/main/assembly/dolphinscheduler-src.xml @@ -57,13 +57,6 @@ **/dolphinscheduler-ui/node/** **/dolphinscheduler-ui/node_modules/** - - **/dolphinscheduler-python/pydolphinscheduler/.pytest_cache/** - **/dolphinscheduler-python/pydolphinscheduler/build/** - **/dolphinscheduler-python/pydolphinscheduler/dist/** - **/dolphinscheduler-python/pydolphinscheduler/dist/** - **/dolphinscheduler-python/pydolphinscheduler/htmlcov/** - **/.settings/** **/.project diff --git a/dolphinscheduler-python/pom.xml b/dolphinscheduler-python/pom.xml deleted file mode 100644 index a3133a52e7..0000000000 --- a/dolphinscheduler-python/pom.xml +++ /dev/null @@ -1,165 +0,0 @@ - - - - 4.0.0 - - org.apache.dolphinscheduler - dolphinscheduler - dev-SNAPSHOT - - dolphinscheduler-python - jar - ${project.artifactId} - - - - release - - false - - - - python - - - - org.codehaus.mojo - exec-maven-plugin - - - python-api-prepare - - exec - - prepare-package - - python - ${project.basedir}/pydolphinscheduler - - -m - pip - install - --upgrade - pip - .[build] - - - - - python-api-clean - - exec - - prepare-package - - python - ${project.basedir}/pydolphinscheduler - - setup.py - pre_clean - - - - - python-api-build - - exec - - prepare-package - - python - ${project.basedir}/pydolphinscheduler - - -m - build - - - - - - python-pkg-rename-tar - - exec - - prepare-package - - bash - ${project.basedir}/pydolphinscheduler - - -c - mv dist/apache-dolphinscheduler-*.tar.gz dist/apache-dolphinscheduler-python-${project.version}.tar.gz - - - - - python-pkg-rename-whl - - exec - - prepare-package - - bash - ${project.basedir}/pydolphinscheduler - - -c - mv dist/apache_dolphinscheduler-*py3-none-any.whl dist/apache_dolphinscheduler-python-${project.version}-py3-none-any.whl - - - - - sign-source - - exec - - prepare-package - - ${python.sign.skip} - bash - ${project.basedir}/pydolphinscheduler - - -c - - gpg --armor --detach-sign --digest-algo=SHA512 dist/*.tar.gz - - - - - sign-wheel - - exec - - prepare-package - - ${python.sign.skip} - bash - ${project.basedir}/pydolphinscheduler - - -c - - gpg --armor --detach-sign --digest-algo=SHA512 dist/*.whl - - - - - - - - - - diff --git a/dolphinscheduler-python/pydolphinscheduler/.coveragerc b/dolphinscheduler-python/pydolphinscheduler/.coveragerc deleted file mode 100644 index 16205094c2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/.coveragerc +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[run] -command_line = -m pytest -omit = - # Ignore all test cases in tests/ - tests/* - # Ignore examples directory - */pydolphinscheduler/examples/* - # TODO. Temporary ignore java_gateway file, because we could not find good way to test it. - */pydolphinscheduler/java_gateway.py - -[report] -# Don’t report files that are 100% covered -skip_covered = True -show_missing = True -precision = 2 -# Report will fail when coverage under 90.00% -fail_under = 90 diff --git a/dolphinscheduler-python/pydolphinscheduler/.isort.cfg b/dolphinscheduler-python/pydolphinscheduler/.isort.cfg deleted file mode 100644 index 70fa2e05bd..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/.isort.cfg +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[settings] -profile=black diff --git a/dolphinscheduler-python/pydolphinscheduler/DEVELOP.md b/dolphinscheduler-python/pydolphinscheduler/DEVELOP.md deleted file mode 100644 index eac4b3678a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/DEVELOP.md +++ /dev/null @@ -1,265 +0,0 @@ - - -# Develop - -pydolphinscheduler is python API for Apache DolphinScheduler, it just defines what workflow look like instead of -store or execute it. We here use [py4j][py4j] to dynamically access Java Virtual Machine. - -## Setup Develop Environment - -**PyDolphinScheduler** use GitHub to hold all source code, you should clone the code before you do same change. - -```shell -git clone git@github.com:apache/dolphinscheduler.git -``` - -Now, we should install all dependence to make sure we could run test or check code style locally - -```shell -cd dolphinscheduler/dolphinscheduler-python/pydolphinscheduler -python -m pip install -e '.[dev]' -``` - -Next, we have to open pydolphinscheduler project in you editor. We recommend you use [pycharm][pycharm] -instead of [IntelliJ IDEA][idea] to open it. And you could just open directory -`dolphinscheduler-python/pydolphinscheduler` instead of `dolphinscheduler-python`. - -## Brief Concept - -Apache DolphinScheduler is design to define workflow by UI, and pydolphinscheduler try to define it by code. When -define by code, user usually do not care user, tenant, or queue exists or not. All user care about is created -a new workflow by the code his/her definition. So we have some **side object** in `pydolphinscheduler/side` -directory, their only check object exists or not, and create them if not exists. - -### Process Definition - -pydolphinscheduler workflow object name, process definition is also same name as Java object(maybe would be change to -other word for more simple). - -### Tasks - -pydolphinscheduler tasks object, we use tasks to define exact job we want DolphinScheduler do for us. For now, -we only support `shell` task to execute shell task. [This link][all-task] list all tasks support in DolphinScheduler -and would be implemented in the further. - -## Test Your Code - -Linting and tests is very important for open source project, so we pay more attention to it. We have continuous -integration service run by GitHub Action to test whether the patch is good or not, which you could jump to -section [With GitHub Action](#with-github-action) see more detail. - -And to make more convenience to local tests, we also have the way to run your [test automated with tox](#automated-testing-with-tox) -locally(*run all tests except integrate test with need docker environment*). It is helpful when your try to find out the -detail when continuous integration in GitHub Action failed, or you have a great patch and want to test local first. - -Besides [automated testing with tox](#automated-testing-with-tox) locally, we also have a [manual way](#manually) -run tests. And it is scattered commands to reproduce each step of the integration test we told about. - -* Remote - * [With GitHub Action](#with-github-action) -* Local - * [Automated Testing With tox](#automated-testing-with-tox)(including all but integrate test) - * [Manually](#manually)(with integrate test) - -### With GitHub Action - -GitHub Action test in various environment for pydolphinscheduler, including different python version in -`3.6|3.7|3.8|3.9` and operating system `linux|macOS|windows`. It will trigger and run automatically when you -submit pull requests to `apache/dolphinscheduler`. - -### Automated Testing With tox - -[tox](https://tox.wiki) is a package aims to automate and standardize testing in Python, both our continuous -integration and local test use it to run actual task. To use it, you should install it first - -```shell -python -m pip install --upgrade tox -``` - -After installation, you could run a single command to run all the tests, it is almost like test in GitHub Action -but not so much different environment. - -```shell -tox -e local-ci -``` - -It will take a while when you run it the first time, because it has to install dependencies and make some prepare, -and the next time you run it will be faster. - -If you failed section `lint` when you run command `tox -e local-ci`, you could try to run command `tox -e auto-lint` -which we provider fix as many lints as possible. When I finish, you could run command `tox -e local-ci` to see -whether the linter pass or not, you have to fix it by yourself if linter still fail. - -### Manually - -#### Code Style - -We use [isort][isort] to automatically keep Python imports alphabetically, and use [Black][black] for code -formatter and [Flake8][flake8] for pep8 checker. If you use [pycharm][pycharm]or [IntelliJ IDEA][idea], -maybe you could follow [Black-integration][black-editor] to configure them in your environment. - -Our Python API CI would automatically run code style checker and unittest when you submit pull request in -GitHub, you could also run static check locally. - -We recommend [pre-commit](https://pre-commit.com/) to do the checker mentioned above before you develop locally. -You should install `pre-commit` by running - -```shell -python -m pip install pre-commit -``` - -in your development environment and then run `pre-commit install` to set up the git hooks scripts. After finish -above steps, each time you run `git commit` or `git push` would run pre-commit check to make basic check before -you create pull requests in GitHub. - -```shell -# We recommend you run isort and Black before Flake8, because Black could auto fix some code style issue -# but Flake8 just hint when code style not match pep8 - -# Run Isort -python -m isort . - -# Run Black -python -m black . - -# Run Flake8 -python -m flake8 -``` - -#### Testing - -## Build Document - -We use [sphinx][sphinx] to build docs. Dolphinscheduler Python API CI would automatically build docs when you submit pull request in -GitHub. You may locally ensure docs could be built successfully in case the failure blocks CI, you can build by tox or manual. - -### Build Document Automatically with tox - -We integrated document build process into tox, you can build the latest document and all document(including history documents) via -single command - -```shell -# Build the latest document in dev branch -tox -e doc-build -# Build all documents, which including the latest and all history documents -tox -e doc-build-multi -``` - -### Build Document Manually - -To build docs locally, install sphinx and related python modules first via: - -```shell -python -m pip install '.[doc]' -``` - -Then go to document directory and execute the build command - -```shell -cd pydolphinscheduler/docs/ -make clean && make html -``` - -> NOTE: We support build multiple versions of documents with [sphinx-multiversion](https://holzhaus.github.io/sphinx-multiversion/master/index.html), -> you can build with command `git fetch --tags && make clean && make multiversion` - -## Testing - -pydolphinscheduler using [pytest][pytest] to test our codebase. GitHub Action will run our test when you create -pull request or commit to dev branch, with python version `3.6|3.7|3.8|3.9` and operating system `linux|macOS|windows`. - -pydolphinscheduler using [pytest][pytest] to run all tests in directory `tests`. You could run tests by the commands - -```shell -python -m pytest --cov=pydolphinscheduler --cov-config=.coveragerc tests/ -``` - -Besides run tests, it will also check the unit test [coverage][coverage] threshold, for now when test cover less than 90% -will fail the coverage, as well as our GitHub Action. - -The command above will check test coverage automatically, and you could also test the coverage by command. - -```shell -python -m coverage run && python -m coverage report -``` - -It would not only run unit test but also show each file coverage which cover rate less than 100%, and `TOTAL` -line show you total coverage of you code. If your CI failed with coverage you could go and find some reason by -this command output. - -### Integrate Test - -Integrate Test can not run when you execute command `tox -e local-ci` because it needs external environment -including [Docker](https://docs.docker.com/get-docker/) and specific image build by [maven](https://maven.apache.org/install.html). -Here we would show you the step to run integrate test in directory `dolphinscheduler-python/pydolphinscheduler/tests/integration`. -There are two ways to run integrate tests. - -#### Method 1: Launch Docker Container Locally - -```shell -# Go to project root directory and build Docker image -cd ../../ - -# Build Docker image -./mvnw -B clean install \ - -Dmaven.test.skip \ - -Dmaven.javadoc.skip \ - -Dmaven.checkstyle.skip \ - -Pdocker,release -Ddocker.tag=ci \ - -pl dolphinscheduler-standalone-server -am - -# Go to pydolphinscheduler root directory and run integrate tests -tox -e integrate-test -``` - -#### Method 2: Start Standalone Server in IntelliJ IDEA - -```shell -# Start the standalone server in IDEA - -# Go to pydolphinscheduler root directory and run integrate tests -tox -e local-integrate-test -``` - -## Add LICENSE When New Dependencies Adding - -When you add a new package in pydolphinscheduler, you should also add the package's LICENSE to directory -`dolphinscheduler-dist/release-docs/licenses/python-api-licenses`, and also add a short description to -`dolphinscheduler-dist/release-docs/LICENSE`. - -## Update `UPDATING.md` when public class, method or interface is be changed - -When you change public class, method or interface, you should change the [UPDATING.md](./UPDATING.md) to notice -users who may use it in other way. - -## Reference - -[py4j]: https://www.py4j.org/index.html -[pycharm]: https://www.jetbrains.com/pycharm -[idea]: https://www.jetbrains.com/idea/ -[all-task]: https://dolphinscheduler.apache.org/en-us/docs/dev/user_doc/guide/task/shell.html -[pytest]: https://docs.pytest.org/en/latest/ -[black]: https://black.readthedocs.io/en/stable/index.html -[flake8]: https://flake8.pycqa.org/en/latest/index.html -[black-editor]: https://black.readthedocs.io/en/stable/integrations/editors.html#pycharm-intellij-idea -[coverage]: https://coverage.readthedocs.io/en/stable/ -[isort]: https://pycqa.github.io/isort/index.html -[sphinx]: https://www.sphinx-doc.org/en/master - diff --git a/dolphinscheduler-python/pydolphinscheduler/LICENSE b/dolphinscheduler-python/pydolphinscheduler/LICENSE deleted file mode 100644 index a7359fad35..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/LICENSE +++ /dev/null @@ -1,228 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - -============================================================================ -Apache DolphinScheduler Python API SUBCOMPONENTS: - -The Apache DolphinScheduler Python API project contains subcomponents -with separate copyright notices and license terms. Your use of the source -code for the these subcomponents is subject to the terms and conditions -of the following licenses. - -======================================================================== -BSD licenses -======================================================================== - -The following components are provided under a BSD license. See project link for details. -The text of each license is also included at licenses/LICENSE-[project].txt. - - py4j v0.10 (https://github.com/py4j/py4j) - click v8.0 (https://github.com/pallets/click) - -======================================================================== -MIT licenses -======================================================================== - -The following components are provided under the MIT License. See project link for details. -The text of each license is also included at licenses/LICENSE-[project].txt. - - ruamel.yaml v0.17 (https://sourceforge.net/projects/ruamel-yaml/) diff --git a/dolphinscheduler-python/pydolphinscheduler/NOTICE b/dolphinscheduler-python/pydolphinscheduler/NOTICE deleted file mode 100644 index 61acdab5d8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Apache DolphinScheduler -Copyright 2017-2022 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). diff --git a/dolphinscheduler-python/pydolphinscheduler/README.md b/dolphinscheduler-python/pydolphinscheduler/README.md deleted file mode 100644 index 7fc73d6a29..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/README.md +++ /dev/null @@ -1,90 +0,0 @@ - - -# pydolphinscheduler - -[![PyPi Version](https://img.shields.io/pypi/v/apache-dolphinscheduler.svg?style=flat-square&logo=PyPi)](https://pypi.org/project/apache-dolphinscheduler/) -[![PyPi Python Versions](https://img.shields.io/pypi/pyversions/apache-dolphinscheduler.svg?style=flat-square&logo=python)](https://pypi.org/project/apache-dolphinscheduler/) -[![PyPi License](https://img.shields.io/pypi/l/apache-dolphinscheduler.svg?style=flat-square)](https://pypi.org/project/apache-dolphinscheduler/) -[![PyPi Status](https://img.shields.io/pypi/status/apache-dolphinscheduler.svg?style=flat-square)](https://pypi.org/project/apache-dolphinscheduler/) -[![PyPi Downloads](https://img.shields.io/pypi/dm/apache-dolphinscheduler?style=flat-square)](https://pypi.org/project/apache-dolphinscheduler/) - -[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/psf/black) -[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat-square&labelColor=ef8336)](https://pycqa.github.io/isort) -[![GitHub Build](https://github.com/apache/dolphinscheduler/actions/workflows/py-ci.yml/badge.svg?branch=dev)](https://github.com/apache/dolphinscheduler/actions?query=workflow%3A%22Python+API%22) - -**PyDolphinScheduler** is python API for Apache DolphinScheduler, which allow you definition -your workflow by python code, aka workflow-as-codes. - -## Quick Start - -### Installation - -```shell -# Install -python -m pip install apache-dolphinscheduler - -# Verify installation is successful, it will show the version of apache-dolphinscheduler, here we use 0.1.0 as example -pydolphinscheduler version -# 0.1.0 -``` - -> NOTE: package apache-dolphinscheduler not work on above Python version 3.10(including itself) in Window operating system -> due to dependence [py4j](https://pypi.org/project/py4j/) not work on those environments. - -Here we show you how to install and run a simple example of pydolphinscheduler - -### Start Server And Run Example - -Before you run an example, you have to start backend server. You could follow -[development setup](../../docs/docs/en/contribute/development-environment-setup.md) -section "DolphinScheduler Standalone Quick Start" to set up developer environment. You have to start backend -and frontend server in this step, which mean that you could view DolphinScheduler UI in your browser with URL -http://localhost:12345/dolphinscheduler - -After backend server is being start, all requests from `pydolphinscheduler` would be sent to backend server. -And for now we could run a simple example by: - - - -```shell -# Please make sure your terminal could -curl https://raw.githubusercontent.com/apache/dolphinscheduler/dev/dolphinscheduler-python/pydolphinscheduler/examples/tutorial.py -o ./tutorial.py -python ./tutorial.py -``` - -> **_NOTICE:_** Since Apache DolphinScheduler's tenant is requests while running command, you might need to change -> tenant value in `example/tutorial.py`. For now the value is `tenant_exists`, please change it to username exists -> in you environment. - -After command execute, you could see a new project with single process definition named *tutorial* in the -[UI-project list](https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/project/project-list.html). - -## Develop - -Until now, we finish quick start by an example of pydolphinscheduler and run it. If you want to inspect or join -pydolphinscheduler develop, you could take a look at [develop](./DEVELOP.md) - -## Release - -If you are interested in how to release **PyDolphinScheduler**, you could go and see at [release](./RELEASE.md) - -## What's more - -For more detail information, please go to see **PyDolphinScheduler** latest(unreleased) [document](https://dolphinscheduler.apache.org/python/dev/index.html) diff --git a/dolphinscheduler-python/pydolphinscheduler/RELEASE.md b/dolphinscheduler-python/pydolphinscheduler/RELEASE.md deleted file mode 100644 index e00ef05beb..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/RELEASE.md +++ /dev/null @@ -1,35 +0,0 @@ - - -# Release - -**PyDolphinScheduler** office release is in [ASF Distribution Directory](https://downloads.apache.org/dolphinscheduler/), -and it should be released together with [apache-dolphinscheduler](https://github.com/apache/dolphinscheduler). - -## To ASF Distribution Directory - -You could release to [ASF Distribution Directory](https://downloads.apache.org/dolphinscheduler/) according to -[release guide](../../docs/docs/en/contribute/release/release-prepare.md) in DolphinScheduler -website. - -## To PyPi - -[PyPI](https://pypi.org), Python Package Index, is a repository of software for the Python programming language. -User could install Python package from it. Release to PyPi make user easier to install and try PyDolphinScheduler, -There is an official way to package project from [PyPA](https://packaging.python.org/en/latest/tutorials/packaging-projects) diff --git a/dolphinscheduler-python/pydolphinscheduler/UPDATING.md b/dolphinscheduler-python/pydolphinscheduler/UPDATING.md deleted file mode 100644 index b298c3b1ad..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/UPDATING.md +++ /dev/null @@ -1,40 +0,0 @@ - - -# UPDATING - -Updating is try to document non-backward compatible updates which notice users the detail changes about pydolphinscheduler. -It started after version 2.0.5 released - -## dev - -* Remove parameter ``task_location`` in process definition and Java Gateway service ([#11681](https://github.com/apache/dolphinscheduler/pull/11681)) -* Remove the spark version of spark task ([#11860](https://github.com/apache/dolphinscheduler/pull/11860)). - -## 3.0.0 - -* Integrate Python gateway server into Dolphinscheduler API server, and you could start Python gateway service by command - `./bin/dolphinscheduler-daemon.sh start api-server` instead of independent command - `./bin/dolphinscheduler-daemon.sh start python-gateway-server`. -* Remove parameter `queue` from class `ProcessDefinition` to avoid confuse user when it change but not work -* Change `yaml_parser.py` method `to_string` to magic method `__str__` make it more pythonic. -* Use package ``ruamel.yaml`` replace ``pyyaml`` for write yaml file with comment. -* Change variable about where to keep pydolphinscheduler configuration from ``PYDOLPHINSCHEDULER_HOME`` to - ``PYDS_HOME`` which is same as other environment variable name. - diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/Makefile b/dolphinscheduler-python/pydolphinscheduler/docs/Makefile deleted file mode 100644 index ff2c4ebb44..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/Makefile +++ /dev/null @@ -1,44 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. - -# Add opts `turn warnings into errors` strict sphinx-build behavior -SPHINXOPTS ?= -W -SPHINXBUILD ?= sphinx-build -SPHINXMULTIVERSION ?= sphinx-multiversion -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -# Create multiple version of docs -multiversion: - @$(SPHINXMULTIVERSION) "$(SOURCEDIR)" "$(BUILDDIR)/html" diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/make.bat b/dolphinscheduler-python/pydolphinscheduler/docs/make.bat deleted file mode 100644 index feac4c92c0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/make.bat +++ /dev/null @@ -1,54 +0,0 @@ -REM Licensed to the Apache Software Foundation (ASF) under one -REM or more contributor license agreements. See the NOTICE file -REM distributed with this work for additional information -REM regarding copyright ownership. The ASF licenses this file -REM to you under the Apache License, Version 2.0 (the -REM "License"); you may not use this file except in compliance -REM with the License. You may obtain a copy of the License at -REM -REM http://www.apache.org/licenses/LICENSE-2.0 -REM -REM Unless required by applicable law or agreed to in writing, -REM software distributed under the License is distributed on an -REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -REM KIND, either express or implied. See the License for the -REM specific language governing permissions and limitations -REM under the License. - -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build -REM Add opts `turn warnings into errors` strict sphinx-build behavior -set SPHINXOPTS=-W - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/_static/.gitkeep b/dolphinscheduler-python/pydolphinscheduler/docs/source/_static/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versioning.html b/dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versioning.html deleted file mode 100644 index 47136c45cf..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versioning.html +++ /dev/null @@ -1,27 +0,0 @@ -{# - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -#} - -{% if versions %} -

{{ _('Versions') }}

- -{% endif %} diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versions.html b/dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versions.html deleted file mode 100644 index 51b7271e9c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/_templates/versions.html +++ /dev/null @@ -1,46 +0,0 @@ -{# - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -#} - -{%- if current_version %} -
- - Other Versions - v: {{ current_version.name }} - - -
- {%- if versions.tags %} -
-
Tags
- {%- for item in versions.tags %} -
{{ item.name }}
- {%- endfor %} -
- {%- endif %} - {%- if versions.branches %} -
-
Branches
- {%- for item in versions.branches %} -
{{ item.name }}
- {%- endfor %} -
- {%- endif %} -
-
-{%- endif %} diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst deleted file mode 100644 index b170b6f870..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/api.rst +++ /dev/null @@ -1,47 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -API -=== - -Core ----- - -.. automodule:: pydolphinscheduler.core - :inherited-members: - -Models ------- - -.. automodule:: pydolphinscheduler.models - :inherited-members: - -Tasks ------ - -.. automodule:: pydolphinscheduler.tasks - :inherited-members: - -Constants ---------- - -.. automodule:: pydolphinscheduler.constants - -Exceptions ----------- - -.. automodule:: pydolphinscheduler.exceptions diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/cli.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/cli.rst deleted file mode 100644 index 60e8231abf..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/cli.rst +++ /dev/null @@ -1,36 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Command Line Interface -====================== - -*PyDolphinScheduler* have mechanism call CLI(command line interface) to help user control it in Shell. - -Prepare -------- - -You have to :ref:`install PyDolphinScheduler ` first before you using -its CLI - -Usage ------ - -Here is basic usage about the command line of *PyDolphinScheduler* - -.. click:: pydolphinscheduler.cli.commands:cli - :prog: pydolphinscheduler - :nested: full diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst deleted file mode 100644 index 9a9527df1d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/concept.rst +++ /dev/null @@ -1,151 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Concepts -======== - -In this section, you would know the core concepts of *PyDolphinScheduler*. - -Process Definition ------------------- - -Process definition describe the whole things except `tasks`_ and `tasks dependence`_, which including -name, schedule interval, schedule start time and end time. You would know scheduler - -Process definition could be initialized in normal assign statement or in context manger. - -.. code-block:: python - - # Initialization with assign statement - pd = ProcessDefinition(name="my first process definition") - - # Or context manger - with ProcessDefinition(name="my first process definition") as pd: - pd.submit() - -Process definition is the main object communicate between *PyDolphinScheduler* and DolphinScheduler daemon. -After process definition and task is be declared, you could use `submit` and `run` notify server your definition. - -If you just want to submit your definition and create workflow, without run it, you should use attribute `submit`. -But if you want to run the workflow after you submit it, you could use attribute `run`. - -.. code-block:: python - - # Just submit definition, without run it - pd.submit() - - # Both submit and run definition - pd.run() - -Schedule -~~~~~~~~ - -We use parameter `schedule` determine the schedule interval of workflow, *PyDolphinScheduler* support seven -asterisks expression, and each of the meaning of position as below - -.. code-block:: text - - * * * * * * * - ┬ ┬ ┬ ┬ ┬ ┬ ┬ - │ │ │ │ │ │ │ - │ │ │ │ │ │ └─── year - │ │ │ │ │ └───── day of week (0 - 7) (0 to 6 are Sunday to Saturday, or use names; 7 is Sunday, the same as 0) - │ │ │ │ └─────── month (1 - 12) - │ │ │ └───────── day of month (1 - 31) - │ │ └─────────── hour (0 - 23) - │ └───────────── min (0 - 59) - └─────────────── second (0 - 59) - -Here we add some example crontab: - -- `0 0 0 * * ? *`: Workflow execute every day at 00:00:00. -- `10 2 * * * ? *`: Workflow execute hourly day at ten pass two. -- `10,11 20 0 1,2 * ? *`: Workflow execute first and second day of month at 00:20:10 and 00:20:11. - -Tenant -~~~~~~ - -Tenant is the user who run task command in machine or in virtual machine. it could be assign by simple string. - -.. code-block:: python - - # - pd = ProcessDefinition(name="process definition tenant", tenant="tenant_exists") - -.. note:: - - Make should tenant exists in target machine, otherwise it will raise an error when you try to run command - -Tasks ------ - -Task is the minimum unit running actual job, and it is nodes of DAG, aka directed acyclic graph. You could define -what you want to in the task. It have some required parameter to make uniqueness and definition. - -Here we use :py:meth:`pydolphinscheduler.tasks.Shell` as example, parameter `name` and `command` is required and must be provider. Parameter -`name` set name to the task, and parameter `command` declare the command you wish to run in this task. - -.. code-block:: python - - # We named this task as "shell", and just run command `echo shell task` - shell_task = Shell(name="shell", command="echo shell task") - -If you want to see all type of tasks, you could see :doc:`tasks/index`. - -Tasks Dependence -~~~~~~~~~~~~~~~~ - -You could define many tasks in on single `Process Definition`_. If all those task is in parallel processing, -then you could leave them alone without adding any additional information. But if there have some tasks should -not be run unless pre task in workflow have be done, we should set task dependence to them. Set tasks dependence -have two mainly way and both of them is easy. You could use bitwise operator `>>` and `<<`, or task attribute -`set_downstream` and `set_upstream` to do it. - -.. code-block:: python - - # Set task1 as task2 upstream - task1 >> task2 - # You could use attribute `set_downstream` too, is same as `task1 >> task2` - task1.set_downstream(task2) - - # Set task1 as task2 downstream - task1 << task2 - # It is same as attribute `set_upstream` - task1.set_upstream(task2) - - # Beside, we could set dependence between task and sequence of tasks, - # we set `task1` is upstream to both `task2` and `task3`. It is useful - # for some tasks have same dependence. - task1 >> [task2, task3] - -Task With Process Definition -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In most of data orchestration cases, you should assigned attribute `process_definition` to task instance to -decide workflow of task. You could set `process_definition` in both normal assign or in context manger mode - -.. code-block:: python - - # Normal assign, have to explicit declaration and pass `ProcessDefinition` instance to task - pd = ProcessDefinition(name="my first process definition") - shell_task = Shell(name="shell", command="echo shell task", process_definition=pd) - - # Context manger, `ProcessDefinition` instance pd would implicit declaration to task - with ProcessDefinition(name="my first process definition") as pd: - shell_task = Shell(name="shell", command="echo shell task", - -With both `Process Definition`_, `Tasks`_ and `Tasks Dependence`_, we could build a workflow with multiple tasks. diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py b/dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py deleted file mode 100644 index 23fc117fb7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/conf.py +++ /dev/null @@ -1,121 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. - -import os -import sys -from pathlib import Path - -# For sphinx-multiversion, we need to build API docs of the corresponding package version, related issue: -# https://github.com/Holzhaus/sphinx-multiversion/issues/42 -pkg_src_dir = ( - Path(os.environ.get("SPHINX_MULTIVERSION_SOURCEDIR", default=".")) - .joinpath("../../src") - .resolve() -) -sys.path.insert(0, str(pkg_src_dir)) -# Debug to uncomment this to see the source path -# print("=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=") -# print(pkg_src_dir) -# [print(p) for p in sys.path] -# print("=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=") - - -# -- Project information ----------------------------------------------------- - -project = "pydolphinscheduler" -copyright = "2022, apache" -author = "apache dolphinscheduler contributors" - -# The full version, including alpha/beta/rc tags -release = "0.0.1" - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - # Measures durations of Sphinx processing - "sphinx.ext.duration", - # Semi-automatic make docstrings to document - "sphinx.ext.autodoc", - "sphinx.ext.viewcode", - "sphinx.ext.autosectionlabel", - "sphinx_rtd_theme", - # Documenting command line interface - "sphinx_click.ext", - # Add inline tabbed content - "sphinx_inline_tabs", - "sphinx_copybutton", - "sphinx_multiversion", -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# sphinx_multiversion configuration -html_sidebars = { - "**": [ - "versioning.html", - ], -} -# Match all exists tag for pydolphinscheduler expect version 2.0.4(not release apache dolphinscheduler) -smv_tag_whitelist = r"^(?!2.0.4)\d+\.\d+\.\d+$" -smv_branch_whitelist = "dev" -smv_remote_whitelist = r"^(origin|upstream)$" -smv_released_pattern = "^refs/tags/.*$" -smv_outputdir_format = "versions/{ref.name}" - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - -autodoc_default_options = { - "members": True, - "show-inheritance": True, - "private-members": True, - "undoc-members": True, - "member-order": "groupwise", -} - -autosectionlabel_prefix_document = True - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/config.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/config.rst deleted file mode 100644 index 29a143d713..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/config.rst +++ /dev/null @@ -1,218 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Configuration -============= - -pydolphinscheduler has a built-in module setting necessary configuration to start and run your workflow code. -You could directly use them if you only want to run a quick start or for a simple job like POC. But if you -want to deep use pydolphinscheduler and even use it in production. You should probably need to modify and -change the built-in configuration. - -We have two ways to modify the configuration: - -- `Using Environment Variables`_: The more lightweight way to modify the configuration. it is useful in - containerization scenarios, like docker and k8s, or when you like to temporarily override configs in the - configuration file. -- `Using Configuration File`_: The more general way to modify the configuration. It is useful when you want - to persist and manage configuration files in one single file. - -Using Environment Variables ---------------------------- - -You could change the configuration by adding or modifying the operating system's environment variables. No -matter what way you used, as long as you can successfully modify the environment variables. We use two common -ways, `Bash `_ and `Python OS Module `_, as examples: - -By Bash -^^^^^^^ - -Setting environment variables via `Bash` is the most straightforward and easiest way. We give some examples about -how to change them by Bash. - -.. code-block:: bash - - # Modify Java Gateway Address - export PYDS_JAVA_GATEWAY_ADDRESS="192.168.1.1" - - # Modify Workflow Default User - export PYDS_WORKFLOW_USER="custom-user" - -After executing the commands above, both ``PYDS_JAVA_GATEWAY_ADDRESS`` and ``PYDS_WORKFLOW_USER`` will be changed. -The next time you execute and submit your workflow, it will submit to host `192.168.1.1`, and with workflow's user -named `custom-user`. - -By Python OS Module -^^^^^^^^^^^^^^^^^^^ - -pydolphinscheduler is a Python API for Apache DolphinScheduler, and you could modify or add system environment -variables via Python ``os`` module. In this example, we change variables as the same value as we change in -`Bash `_. It will take effect the next time you run your workflow, and call workflow ``run`` or ``submit`` -method next to ``os.environ`` statement. - -.. code-block:: python - - import os - # Modify Java Gateway Address - os.environ["PYDS_JAVA_GATEWAY_ADDRESS"] = "192.168.1.1" - - # Modify Workflow Default User - os.environ["PYDS_WORKFLOW_USER"] = "custom-user" - -All Configurations in Environment Variables -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -All environment variables as below, and you could modify their value via `Bash `_ or `Python OS Module `_ - -+------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Variable Section | Variable Name | description | -+==================+====================================+====================================================================================================================+ -| | ``PYDS_JAVA_GATEWAY_ADDRESS`` | Default Java gateway address, will use its value when it is set. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Java Gateway | ``PYDS_JAVA_GATEWAY_PORT`` | Default Java gateway port, will use its value when it is set. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_JAVA_GATEWAY_AUTO_CONVERT`` | Default boolean Java gateway auto convert, will use its value when it is set. | -+------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_USER_NAME`` | Default user name, will use when user's ``name`` when does not specify. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_USER_PASSWORD`` | Default user password, will use when user's ``password`` when does not specify. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Default User | ``PYDS_USER_EMAIL`` | Default user email, will use when user's ``email`` when does not specify. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_USER_PHONE`` | Default user phone, will use when user's ``phone`` when does not specify. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_USER_STATE`` | Default user state, will use when user's ``state`` when does not specify. | -+------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_PROJECT`` | Default workflow project name, will use its value when workflow does not specify the attribute ``project``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_TENANT`` | Default workflow tenant, will use its value when workflow does not specify the attribute ``tenant``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| Default Workflow | ``PYDS_WORKFLOW_USER`` | Default workflow user, will use its value when workflow does not specify the attribute ``user``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_QUEUE`` | Default workflow queue, will use its value when workflow does not specify the attribute ``queue``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_WORKER_GROUP`` | Default workflow worker group, will use its value when workflow does not specify the attribute ``worker_group``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_RELEASE_STATE`` | Default workflow release state, will use its value when workflow does not specify the attribute ``release_state``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_TIME_ZONE`` | Default workflow worker group, will use its value when workflow does not specify the attribute ``timezone``. | -+ +------------------------------------+--------------------------------------------------------------------------------------------------------------------+ -| | ``PYDS_WORKFLOW_WARNING_TYPE`` | Default workflow warning type, will use its value when workflow does not specify the attribute ``warning_type``. | -+------------------+------------------------------------+--------------------------------------------------------------------------------------------------------------------+ - -.. note:: - - The scope of setting configuration via environment variable is in the workflow, and it will not change the - value of the configuration file. The :doc:`CLI ` command ``config --get`` and ``config --set`` operate - the value of the configuration file, so the command ``config --get`` may return a different value from what - you set in the environment variable, and command ``config --get`` will never change your environment variable. - -Using Configuration File ------------------------- - -If you want to persist and manage configuration in a file instead of environment variables, or maybe you want -want to save your configuration file to a version control system, like Git or SVN, and the way to change -configuration by file is the best choice. - -Export Configuration File -^^^^^^^^^^^^^^^^^^^^^^^^^ - -pydolphinscheduler allows you to change the built-in configurations via CLI or editor you like. pydolphinscheduler -integrated built-in configurations in its package, but you could also export it locally by CLI - -.. code-block:: bash - - pydolphinscheduler config --init - -And it will create a new YAML file in the path `~/pydolphinscheduler/config.yaml` by default. If you want to export -it to another path, you should set `PYDS_HOME` before you run command :code:`pydolphinscheduler config --init`. - -.. code-block:: bash - - export PYDS_HOME= - pydolphinscheduler config --init - -After that, your configuration file will export into `/config.yaml` instead of the default path. - -Change Configuration -^^^^^^^^^^^^^^^^^^^^ - -In section `export configuration file`_ you export the configuration file locally, and as a local file, you could -edit it with any editor you like. After you save your change in your editor, the latest configuration will work -when you run your workflow code. - -You could also query or change the configuration via CLI :code:`config --get ` or :code:`config --get `. -Both `--get` and `--set` could be called one or more times in single command, and you could only set the leaf -node of the configuration but could get the parent configuration, there are simple examples below: - -.. code-block:: bash - - # Get single configuration in the leaf node, - # The output look like below: - # java_gateway.address = 127.0.0.1 - pydolphinscheduler config --get java_gateway.address - - # Get multiple configuration in the leaf node, - # The output look like below: - # java_gateway.address = 127.0.0.1 - # java_gateway.port = 25333 - pydolphinscheduler config --get java_gateway.address --get java_gateway.port - - - # Get parent configuration which contain multiple leaf nodes, - # The output look like below: - # java_gateway = ordereddict([('address', '127.0.0.1'), ('port', 25333), ('auto_convert', True)]) - pydolphinscheduler config --get java_gateway - - # Set single configuration, - # The output look like below: - # Set configuration done. - pydolphinscheduler config --set java_gateway.address 192.168.1.1 - - # Set multiple configuration - # The output look like below: - # Set configuration done. - pydolphinscheduler config --set java_gateway.address 192.168.1.1 --set java_gateway.port 25334 - - # Set configuration not in leaf node will fail - # The output look like below: - # Raise error. - pydolphinscheduler config --set java_gateway 192.168.1.1,25334,True - -For more information about our CLI, you could see document :doc:`cli`. - -All Configurations in File -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Here are all our configurations for pydolphinscheduler. - -.. literalinclude:: ../../src/pydolphinscheduler/default_config.yaml - :language: yaml - :lines: 18- - -Priority --------- - -We have two ways to modify the configuration and there is a built-in config in pydolphinscheduler too. It is -very important to understand the priority of the configuration when you use them. The overview of configuration -priority is. - -``Environment Variables > Configurations File > Built-in Configurations`` - -This means that your setting in environment variables or configurations file will overwrite the built-in one. -And you could temporarily modify configurations by setting environment variables without modifying the global -config in the configuration file. diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/howto/index.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/howto/index.rst deleted file mode 100644 index a0b3c29c0c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/howto/index.rst +++ /dev/null @@ -1,30 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -HOWTOs -====== - -pydolphinscheduler HOWTOs are documents that cover a single, specific topic, and attempt to cover it fairly -completely. This collection is an effort to foster documentation that is more detailed than the :doc:`../concept` -and :doc:`../tutorial`. - -Currently, the HOWTOs are: - -.. toctree:: - :maxdepth: 2 - - remote-submit diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/howto/remote-submit.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/howto/remote-submit.rst deleted file mode 100644 index b7efdf4fc0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/howto/remote-submit.rst +++ /dev/null @@ -1,51 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Submit Your Code from Different machine -======================================= - -Generally, we use pydolphinscheduler as a client to DolphinScheduler, and consider we may change our workflow -code frequently, the best practice is running :ref:`python gateway service ` -in your server machine and submit the workflow code from your development machine, like a laptop or PC. This behavior -is supported by pydolphinscheduler out of box with one or two single command lines. - -Export Configuration File -------------------------- - -.. code-block:: bash - - pydolphinscheduler config --init - -your could find more detail in :ref:`configuration exporting ` - -Run API Server in Other Host ----------------------------- - -.. code-block:: bash - - pydolphinscheduler config --set java_gateway.address - -your could find more detail in :ref:`configuration setting ` - -Run API Server in Other Port ----------------------------- - -.. code-block:: bash - - pydolphinscheduler config --set java_gateway.port - -your could find more detail in :ref:`configuration setting ` diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/index.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/index.rst deleted file mode 100644 index 4dc0a949c9..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/index.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -PyDolphinScheduler -================== - -**PyDolphinScheduler** is Python API for `Apache DolphinScheduler `_, -which allow you definition your workflow by Python code, aka workflow-as-codes. - -I could go and find how to :ref:`install ` the project. Or if you want to see simply example -then go and see :doc:`tutorial` for more detail. - - -.. toctree:: - :maxdepth: 2 - - start - tutorial - concept - tasks/index - howto/index - cli - config - api - resources_plugin/index - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/develop.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/develop.rst deleted file mode 100644 index e7d90ea03c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/develop.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -How to develop -============== - -When you want to create a new resource plugin, you need to add a new class in the module `resources_plugin`. - -The resource plugin class needs to inherit the abstract class `ResourcePlugin` and implement its abstract method `read_file` function. - -The parameter of the `__init__` function of `ResourcePlugin` is the prefix of STR type. You can override this function when necessary. - -The `read_file` function parameter of `ResourcePlugin` is the file suffix of STR type, and its return value is the file content, if it exists and is readable. - - -Example -------- -- Method `__init__`: Initiation method with `param`:`prefix` - -.. literalinclude:: ../../../src/pydolphinscheduler/resources_plugin/local.py - :start-after: [start init_method] - :end-before: [end init_method] - -- Method `read_file`: Get content from the given URI, The function parameter is the suffix of the file path. - -The file prefix has been initialized in init of the resource plugin. - -The prefix plus suffix is the absolute path of the file in this resource. - -.. literalinclude:: ../../../src/pydolphinscheduler/resources_plugin/local.py - :start-after: [start read_file_method] - :end-before: [end read_file_method] diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/github.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/github.rst deleted file mode 100644 index b3023377de..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/github.rst +++ /dev/null @@ -1,35 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -GitHub -====== - -`GitHub` is a github resource plugin for pydolphinscheduler. - -When using a github resource plugin, you only need to add the `resource_plugin` parameter in the task subclass or workflow definition, -such as `resource_plugin=GitHub(prefix="https://github.com/xxx", access_token="ghpxx")`. -The token parameter is optional. You need to add it when your repository is a private repository. - -You can view this `document `_ -when creating a token. - -For the specific use of resource plugins, you can see `How to use` in :doc:`resource-plugin` - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.resources_plugin.github \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/gitlab.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/gitlab.rst deleted file mode 100644 index fdf43c9d2f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/gitlab.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -GitLab -====== - -`GitLab` is a gitlab resource plugin for pydolphinscheduler. - -When using a gitlab resource plugin, you only need to add the `resource_plugin` parameter in the task subclass or workflow definition, -such as `resource_plugin=GitLab(prefix="xxx")`, if it is a public repository. - -If it is a private or Internal repository, you can use three ways to obtain authentication. - -The first is `Personal Access Tokens`, using `resource_plugin=GitLab(prefix="xxx", private_token="xxx")`. - -The second method is to obtain authentication through `username` and `password`: - -using `resource_plugin=GitLab(prefix="xxx", username="username", password="pwd")`. - -The third method is to obtain authentication through `OAuth Token`: - -using `resource_plugin=GitLab(prefix="xxx", oauth_token="xx")`. - -You can view this `document `_ -when creating a `Personal Access Tokens`. - -For the specific use of resource plugins, you can see `How to use` in :doc:`resource-plugin` - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.resources_plugin.gitlab \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/index.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/index.rst deleted file mode 100644 index c984f06048..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/index.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Resources_plugin -================ - -In this section - -.. toctree:: - :maxdepth: 1 - - develop - resource-plugin - local - github - gitlab - oss - s3 \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/local.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/local.rst deleted file mode 100644 index 5da025a5c7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/local.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Local -===== - -`Local` is a local resource plugin for pydolphinscheduler. - -When using a local resource plugin, you only need to add the `resource_plugin` parameter in the task subclass or workflow definition, -such as `resource_plugin=Local("/tmp")`. - - -For the specific use of resource plugins, you can see `How to use` in :doc:`./resource-plugin` - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.resources_plugin.local \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/oss.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/oss.rst deleted file mode 100644 index fbb6785d1d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/oss.rst +++ /dev/null @@ -1,44 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -OSS -=== - -`OSS` is a Aliyun OSS resource plugin for pydolphinscheduler. - -When using a OSS resource plugin, you only need to add the `resource_plugin` parameter in the task subclass or workflow definition, -such as `resource_plugin=OSS(prefix="xxx")`, if the file is publicly readable. - -When the file is private, using `resource_plugin=OSS(prefix="xxx", access_key_id="xxx", access_key_secret="xxx")` - -Notice -The read permission of files in a bucket is inherited from the bucket by default. In other words, if the bucket is private, -the files in it are also private. - -But the read permission of the files in the bucket can be changed, in other words, the files in the private bucket can also be read publicly. - -So whether the `AccessKey` is needed depends on whether the file is private or not. - -You can view this `document `_ -when creating a pair `AccessKey`. - -For the specific use of resource plugins, you can see `How to use` in :doc:`resource-plugin` - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.resources_plugin.OSS diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/resource-plugin.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/resource-plugin.rst deleted file mode 100644 index 2a32526208..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/resource-plugin.rst +++ /dev/null @@ -1,75 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -ResourcePlugin -============== - -`ResourcePlugin` is an abstract class of resource plug-in parameters of task subclass and workflow. -All resource plugins need to inherit and override its abstract methods. - -Code ----- -.. literalinclude:: ../../../src/pydolphinscheduler/core/resource_plugin.py - :start-after: [start resource_plugin_definition] - :end-before: [end resource_plugin_definition] - -Dive Into ---------- -It has the following key functions. - -- Method `__init__`: The `__init__` function has STR type parameter `prefix`, which means the prefix of the resource. - -You can rewrite this function if necessary. - -.. literalinclude:: ../../../src/pydolphinscheduler/core/resource_plugin.py - :start-after: [start init_method] - :end-before: [end init_method] - -- Method `read_file`: Get content from the given URI, The function parameter is the suffix of the file path. - -The file prefix has been initialized in init of the resource plug-in. - -The prefix plus suffix is the absolute path of the file in this resource. - -It is an abstract function. You must rewrite it - -.. literalinclude:: ../../../src/pydolphinscheduler/core/resource_plugin.py - :start-after: [start abstractmethod read_file] - :end-before: [end abstractmethod read_file] - -.. automodule:: pydolphinscheduler.core.resource_plugin - -How to use ----------- -Resource plugin can be used in task subclasses and workflows. You can use the resource plugin by adding the `resource_plugin` parameter when they are initialized. -For example, local resource plugin, add `resource_plugin = Local("/tmp")`. - -The resource plugin we currently support are `local`, `github`, `gitlab`, `OSS`, `S3`. - -Here is an example. - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/tutorial_resource_plugin.py - :start-after: [start workflow_declare] - :end-before: [end task_declare] - -When the resource_plugin parameter is defined in both the task subclass and the workflow, the resource_plugin defined in the task subclass is used first. - -If the task subclass does not define resource_plugin, but the resource_plugin is defined in the workflow, the resource_plugin in the workflow is used. - -Of course, if neither the task subclass nor the workflow specifies resource_plugin, the command at this time will be executed as a script, - -in other words, we are forward compatible. \ No newline at end of file diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/s3.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/s3.rst deleted file mode 100644 index f5bc1d37fe..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/s3.rst +++ /dev/null @@ -1,36 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -S3 -== - -`S3` is a Amazon S3 resource plugin for pydolphinscheduler. - -When using a Amazon S3 resource plugin, you only need to add the `resource_plugin` parameter in the task subclass or workflow definition, -such as `resource_plugin=S3(prefix="xxx")`, if the file is publicly readable. - -When the file is private, using `resource_plugin=S3(prefix="xxx", access_key_id="xxx", access_key_secret="xxx")` - -You can view this `document `_ -when creating a pair `AccessKey`. - -For the specific use of resource plugins, you can see `How to use` in :doc:`resource-plugin` - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.resources_plugin.S3 diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst deleted file mode 100644 index 270b5b855d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst +++ /dev/null @@ -1,171 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Getting Started -=============== - -To get started with *PyDolphinScheduler* you must ensure python and pip -installed on your machine, if you're already set up, you can skip straight -to `Installing PyDolphinScheduler`_, otherwise please continue with -`Installing Python`_. - -Installing Python ------------------ - -How to install `python` and `pip` depends on what operating system -you're using. The python wiki provides up to date -`instructions for all platforms here`_. When you entering the website -and choice your operating system, you would be offered the choice and -select python version. *PyDolphinScheduler* recommend use version above -Python 3.6 and we highly recommend you install *Stable Releases* instead -of *Pre-releases*. - -After you have download and installed Python, you should open your terminal, -typing and running :code:`python --version` to check whether the installation -is correct or not. If all thing good, you could see the version in console -without error(here is a example after Python 3.8.7 installed) - -.. code-block:: bash - - python --version - -Will see detail of Python version, such as *Python 3.8.7* - -Installing PyDolphinScheduler ------------------------------ - -After Python is already installed on your machine following section -`installing Python`_, it easy to *PyDolphinScheduler* by pip. - -.. code-block:: bash - - python -m pip install apache-dolphinscheduler - -The latest version of *PyDolphinScheduler* would be installed after you run above -command in your terminal. You could go and `start Python Gateway Service`_ to finish -the prepare, and then go to :doc:`tutorial` to make your hand dirty. But if you -want to install the unreleased version of *PyDolphinScheduler*, you could go and see -section `installing PyDolphinScheduler in dev branch`_ for more detail. - -.. note:: - - Currently, we released multiple pre-release package in PyPI, you can see all released package - including pre-release in `release history `_. - You can fix the the package version if you want to install pre-release package, for example if - you want to install version `3.0.0-beta-2` package, you can run command - :code:`python -m pip install apache-dolphinscheduler==3.0.0b2`. - -Installing PyDolphinScheduler In DEV Branch -------------------------------------------- - -Because the project is developing and some of the features still not release. -If you want to try some thing unreleased you could install from the source code -which we hold in GitHub - -.. code-block:: bash - - # Clone Apache DolphinScheduler repository - git clone git@github.com:apache/dolphinscheduler.git - # Install PyDolphinScheduler in develop mode - cd dolphinscheduler-python/pydolphinscheduler && python -m pip install -e . - -After you installed *PyDolphinScheduler*, please remember `start Python Gateway Service`_ -which waiting for *PyDolphinScheduler*'s workflow definition require. - -Above command will clone whole dolphinscheduler source code to local, maybe you want to install latest pydolphinscheduler -package directly and do not care about other code(including Python gateway service code), you can execute command - -.. code-block:: bash - - # Must escape the '&' character by adding '\' - pip install -e "git+https://github.com/apache/dolphinscheduler.git#egg=apache-dolphinscheduler&subdirectory=dolphinscheduler-python/pydolphinscheduler" - -Start Python Gateway Service ----------------------------- - -Since **PyDolphinScheduler** is Python API for `Apache DolphinScheduler`_, it -could define workflow and tasks structure, but could not run it unless you -`install Apache DolphinScheduler`_ and start its API server which including -Python gateway service in it. We only and some key steps here and you could -go `install Apache DolphinScheduler`_ for more detail - -.. code-block:: bash - - # Start DolphinScheduler api-server which including python gateway service - ./bin/dolphinscheduler-daemon.sh start api-server - -To check whether the server is alive or not, you could run :code:`jps`. And -the server is health if keyword `ApiApplicationServer` in the console. - -.. code-block:: bash - - jps - # .... - # 201472 ApiApplicationServer - # .... - -.. note:: - - Please make sure you already enabled started Python gateway service along with `api-server`. The configuration is in - yaml config path `python-gateway.enabled : true` in api-server's configuration path in `api-server/conf/application.yaml`. - The default value is true and Python gateway service start when api server is been started. - -Run an Example --------------- - -Before run an example for pydolphinscheduler, you should get the example code from it source code. You could run -single bash command to get it - -.. code-block:: bash - - wget https://raw.githubusercontent.com/apache/dolphinscheduler/dev/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py - -or you could copy-paste the content from `tutorial source code`_. And then you could run the example in your -terminal - -.. code-block:: bash - - python tutorial.py - -If you want to submit your workflow to a remote API server, which means that your workflow script is different -from the API server, you should first change pydolphinscheduler configuration and then submit the workflow script - -.. code-block:: bash - - pydolphinscheduler config --init - pydolphinscheduler config --set java_gateway.address - python tutorial.py - -.. note:: - - You could see more information in :doc:`config` about all the configurations pydolphinscheduler supported. - -After that, you could go and see your DolphinScheduler web UI to find out a new workflow created by pydolphinscheduler, -and the path of web UI is `Project -> Workflow -> Workflow Definition`. - - -What's More ------------ - -If you do not familiar with *PyDolphinScheduler*, you could go to :doc:`tutorial` and see how it works. But -if you already know the basic usage or concept of *PyDolphinScheduler*, you could go and play with all -:doc:`tasks/index` *PyDolphinScheduler* supports, or see our :doc:`howto/index` about useful cases. - -.. _`instructions for all platforms here`: https://wiki.python.org/moin/BeginnersGuide/Download -.. _`Apache DolphinScheduler`: https://dolphinscheduler.apache.org -.. _`install Apache DolphinScheduler`: https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/installation/standalone.html -.. _`tutorial source code`: https://raw.githubusercontent.com/apache/dolphinscheduler/dev/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst deleted file mode 100644 index f6d7e6ad8f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/condition.rst +++ /dev/null @@ -1,40 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Condition -========= - -A condition task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_condition_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.condition - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Condition.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst deleted file mode 100644 index cb67a2fa9e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/datax.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Datax -===== - -A DataX task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_datax_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.datax - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/DataX.yaml - :start-after: # under the License. - :language: yaml - - -example_datax.json: - -.. literalinclude:: ../../../examples/yaml_define/example_datax.json - :language: json diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst deleted file mode 100644 index d8e1599b2d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dependent.rst +++ /dev/null @@ -1,47 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Dependent -========= - -A dependent task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_dependent_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.dependent - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Dependent.yaml - :start-after: # under the License. - :language: yaml - -Dependent_External.yaml: - -.. literalinclude:: ../../../examples/yaml_define/Dependent_External.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dvc.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dvc.rst deleted file mode 100644 index 0127a982f3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/dvc.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -DVC -=== - -A DVC task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_dvc_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.dvc - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Dvc.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst deleted file mode 100644 index 76eb484718..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/flink.rst +++ /dev/null @@ -1,40 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Flink -===== - -A flink task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_flink_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.flink - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Flink.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/func_wrap.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/func_wrap.rst deleted file mode 100644 index a4a2972933..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/func_wrap.rst +++ /dev/null @@ -1,33 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Python Function Wrapper -======================= - -A decorator covert Python function into pydolphinscheduler's task. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/tutorial_decorator.py - :start-after: [start tutorial] - :end-before: [end tutorial] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.func_wrap diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst deleted file mode 100644 index 4e138c9989..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/http.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -HTTP -==== - -.. automodule:: pydolphinscheduler.tasks.http - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Http.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst deleted file mode 100644 index 3f83f92675..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/index.rst +++ /dev/null @@ -1,48 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Tasks -===== - -In this section - -.. toctree:: - :maxdepth: 1 - - func_wrap - shell - sql - python - http - - switch - condition - dependent - - spark - flink - map_reduce - procedure - - datax - sub_process - - sagemaker - mlflow - openmldb - pytorch - dvc diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst deleted file mode 100644 index 7356880b26..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/map_reduce.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Map Reduce -========== - - -A Map Reduce task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_map_reduce_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.map_reduce - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/MapReduce.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/mlflow.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/mlflow.rst deleted file mode 100644 index b83903c26f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/mlflow.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -MLflow -========= - - -A MLflow task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_mlflow_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.mlflow - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/mlflow.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/openmldb.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/openmldb.rst deleted file mode 100644 index 125313dc21..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/openmldb.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -OpenMLDB -========= - - -A OpenMLDB task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_openmldb_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.openmldb - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/OpenMLDB.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst deleted file mode 100644 index 2f28efc526..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/procedure.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Procedure -========= - -.. automodule:: pydolphinscheduler.tasks.procedure - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Procedure.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst deleted file mode 100644 index 1bf6210018..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/python.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Python -====== - -.. automodule:: pydolphinscheduler.tasks.python - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Python.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/pytorch.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/pytorch.rst deleted file mode 100644 index 4c7a5521fb..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/pytorch.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Pytorch -======= - - -A Pytorch task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_pytorch_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.pytorch - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Pytorch.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sagemaker.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sagemaker.rst deleted file mode 100644 index 36880d91d2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sagemaker.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -SageMaker -========= - - -A SageMaker task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_sagemaker_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.sagemaker - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Sagemaker.yaml - :start-after: # under the License. - :language: yaml - -example_sagemaker_params.json: - -.. literalinclude:: ../../../examples/yaml_define/example_sagemaker_params.json - :language: json diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst deleted file mode 100644 index 2dd106a3b8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/shell.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Shell -===== - -A shell task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/tutorial.py - :start-after: [start workflow_declare] - :end-before: [end task_relation_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.shell - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Shell.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst deleted file mode 100644 index d5a51db91a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/spark.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Spark -===== - -A spark task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_spark_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.spark - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Spark.yaml - :start-after: # under the License. - :language: yaml diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst deleted file mode 100644 index 52df042b74..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sql.rst +++ /dev/null @@ -1,35 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -SQL -=== - -.. automodule:: pydolphinscheduler.tasks.sql - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Sql.yaml - :start-after: # under the License. - :language: yaml - -example_sql.sql: - -.. literalinclude:: ../../../examples/yaml_define/example_sql.sql - :start-after: */ - :language: sql diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst deleted file mode 100644 index 894dd0fbad..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/sub_process.rst +++ /dev/null @@ -1,38 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Sub Process -=========== - -.. automodule:: pydolphinscheduler.tasks.sub_process - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/SubProcess.yaml - :start-after: # under the License. - :language: yaml - - - -example_subprocess.yaml: - -.. literalinclude:: ../../../examples/yaml_define/example_sub_workflow.yaml - :start-after: # under the License. - :language: yaml - diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst deleted file mode 100644 index 2fef589efb..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tasks/switch.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Switch -====== - -A switch task type's example and dive into information of **PyDolphinScheduler**. - -Example -------- - -.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_switch_example.py - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -Dive Into ---------- - -.. automodule:: pydolphinscheduler.tasks.switch - - -YAML file example ------------------ - -.. literalinclude:: ../../../examples/yaml_define/Switch.yaml - :start-after: # under the License. - :language: yaml - diff --git a/dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst b/dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst deleted file mode 100644 index 57d21b2d29..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst +++ /dev/null @@ -1,319 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Tutorial -======== - -This tutorial shows you the basic concept of *PyDolphinScheduler* and tells all -things you should know before you submit or run your first workflow. If you -still have not installed *PyDolphinScheduler* and start DolphinScheduler, you -could go and see :ref:`how to getting start PyDolphinScheduler ` firstly. - -Overview of Tutorial --------------------- - -Here have an overview of our tutorial, and it looks a little complex but does not -worry about that because we explain this example below as detail as possible. - -There are two types of tutorials: traditional and task decorator. - -- **Traditional Way**: More general, support many :doc:`built-in task types `, it is convenient - when you build your workflow at the beginning. -- **Task Decorator**: A Python decorator allow you to wrap your function into pydolphinscheduler's task. Less - versatility to the traditional way because it only supported Python functions and without build-in tasks - supported. But it is helpful if your workflow is all built with Python or if you already have some Python - workflow code and want to migrate them to pydolphinscheduler. -- **YAML File**: We can use pydolphinscheduler CLI to create process using YAML file: :code:`pydolphinscheduler yaml -f tutorial.yaml`. - We can find more YAML file examples in `examples/yaml_define `_ - -.. tab:: Tradition - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :dedent: 0 - :start-after: [start tutorial] - :end-before: [end tutorial] - -.. tab:: Task Decorator - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py - :dedent: 0 - :start-after: [start tutorial] - :end-before: [end tutorial] - -.. tab:: YAML File - - .. literalinclude:: ../../examples/yaml_define/tutorial.yaml - :start-after: # under the License. - :language: yaml - -Import Necessary Module ------------------------ - -First of all, we should import the necessary module which we would use later just like other Python packages. - -.. tab:: Tradition - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :dedent: 0 - :start-after: [start package_import] - :end-before: [end package_import] - - In tradition tutorial we import :class:`pydolphinscheduler.core.process_definition.ProcessDefinition` and - :class:`pydolphinscheduler.tasks.shell.Shell`. - - If you want to use other task type you could click and :doc:`see all tasks we support ` - -.. tab:: Task Decorator - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py - :dedent: 0 - :start-after: [start package_import] - :end-before: [end package_import] - - In task decorator tutorial we import :class:`pydolphinscheduler.core.process_definition.ProcessDefinition` and - :func:`pydolphinscheduler.tasks.func_wrap.task`. - -Process Definition Declaration ------------------------------- - -We should instantiate :class:`pydolphinscheduler.core.process_definition.ProcessDefinition` object after we -import them from `import necessary module`_. Here we declare basic arguments for process definition(aka, workflow). -We define the name of :code:`ProcessDefinition`, using `Python context manager`_ and it **the only required argument** -for `ProcessDefinition`. Besides, we also declare three arguments named :code:`schedule` and :code:`start_time` -which setting workflow schedule interval and schedule start_time, and argument :code:`tenant` defines which tenant -will be running this task in the DolphinScheduler worker. See :ref:`section tenant ` in -*PyDolphinScheduler* :doc:`concept` for more information. - -.. tab:: Tradition - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :dedent: 0 - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -.. tab:: Task Decorator - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py - :dedent: 0 - :start-after: [start workflow_declare] - :end-before: [end workflow_declare] - -.. tab:: YAML File - - .. literalinclude:: ../../examples/yaml_define/tutorial.yaml - :start-after: # under the License. - :end-before: # Define the tasks under the workflow - :language: yaml - -We could find more detail about :code:`ProcessDefinition` in :ref:`concept about process definition ` -if you are interested in it. For all arguments of object process definition, you could find in the -:class:`pydolphinscheduler.core.process_definition` API documentation. - -Task Declaration ----------------- - -.. tab:: Tradition - - We declare four tasks to show how to create tasks, and both of them are simple tasks of - :class:`pydolphinscheduler.tasks.shell` which runs `echo` command in the terminal. Besides the argument - `command` with :code:`echo` command, we also need to set the argument `name` for each task - *(not only shell task, `name` is required for each type of task)*. - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :dedent: 0 - :start-after: [start task_declare] - :end-before: [end task_declare] - - Besides shell task, *PyDolphinScheduler* supports multiple tasks and you could find in :doc:`tasks/index`. - -.. tab:: Task Decorator - - We declare four tasks to show how to create tasks, and both of them are created by the task decorator which - using :func:`pydolphinscheduler.tasks.func_wrap.task`. All we have to do is add a decorator named - :code:`@task` to existing Python function, and then use them inside :class:`pydolphinscheduler.core.process_definition` - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py - :dedent: 0 - :start-after: [start task_declare] - :end-before: [end task_declare] - - It makes our workflow more Pythonic, but be careful that when we use task decorator mode mean we only use - Python function as a task and could not use the :doc:`built-in tasks ` most of the cases. - -.. tab:: YAML File - - .. literalinclude:: ../../examples/yaml_define/tutorial.yaml - :start-after: # Define the tasks under the workflow - :language: yaml - -Setting Task Dependence ------------------------ - -After we declare both process definition and task, we have four tasks that are independent and will be running -in parallel. If you want to start one task until some task is finished, you have to set dependence on those -tasks. - -Set task dependence is quite easy by task's attribute :code:`set_downstream` and :code:`set_upstream` or by -bitwise operators :code:`>>` and :code:`<<` - -In this tutorial, task `task_parent` is the leading task of the whole workflow, then task `task_child_one` and -task `task_child_two` are its downstream tasks. Task `task_union` will not run unless both task `task_child_one` -and task `task_child_two` was done, because both two task is `task_union`'s upstream. - -.. tab:: Tradition - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :dedent: 0 - :start-after: [start task_relation_declare] - :end-before: [end task_relation_declare] - -.. tab:: Task Decorator - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py - :dedent: 0 - :start-after: [start task_relation_declare] - :end-before: [end task_relation_declare] - -.. tab:: YAML File - - We can use :code:`deps:[]` to set task dependence - - .. literalinclude:: ../../examples/yaml_define/tutorial.yaml - :start-after: # Define the tasks under the workflow - :language: yaml - -.. note:: - - We could set task dependence in batch mode if they have the same downstream or upstream by declaring those - tasks as task groups. In tutorial, We declare task `task_child_one` and `task_child_two` as task group named - `task_group`, then set `task_group` as downstream of task `task_parent`. You could see more detail in - :ref:`concept:Tasks Dependence` for more detail about how to set task dependence. - -Submit Or Run Workflow ----------------------- - -After that, we finish our workflow definition, with four tasks and task dependence, but all these things are -local, we should let the DolphinScheduler daemon know how the definition of workflow. So the last thing we -have to do is submit the workflow to the DolphinScheduler daemon. - -Fortunately, we have a convenient method to submit workflow via `ProcessDefinition` attribute :code:`run` which -will create workflow definition as well as workflow schedule. - -.. tab:: Tradition - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :dedent: 0 - :start-after: [start submit_or_run] - :end-before: [end submit_or_run] - -.. tab:: Task Decorator - - .. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial_decorator.py - :dedent: 0 - :start-after: [start submit_or_run] - :end-before: [end submit_or_run] - -.. tab:: YAML File - - pydolphinscheduler YAML CLI always submit workflow. We can run the workflow if we set :code:`run: true` - - .. code-block:: yaml - - # Define the workflow - workflow: - name: "tutorial" - run: true - -At last, we could execute this workflow code in your terminal like other Python scripts, running -:code:`python tutorial.py` to trigger and execute it. - -.. note:: - - If you do not start your DolphinScheduler API server, you could find how to start it in - :ref:`start:start Python gateway service` for more detail. Besides attribute :code:`run`, we have attribute - :code:`submit` for object `ProcessDefinition` which just submits workflow to the daemon but does not set - the workflow schedule information. For more detail, you could see :ref:`concept:process definition`. - -DAG Graph After Tutorial Run ----------------------------- - -After we run the tutorial code, you could log in DolphinScheduler web UI, go and see the -`DolphinScheduler project page`_. They is a new process definition be created by *PyDolphinScheduler* and it -named "tutorial" or "tutorial_decorator". The task graph of workflow like below: - -.. literalinclude:: ../../src/pydolphinscheduler/examples/tutorial.py - :language: text - :lines: 24-28 - -Create Process Using YAML File ------------------------------- - -We can use pydolphinscheduler CLI to create process using YAML file - -.. code-block:: bash - - pydolphinscheduler yaml -f Shell.yaml - -We can use the following four special grammars to define workflows more flexibly. - -- :code:`$FILE{"file_name"}`: Read the file (:code:`file_name`) contents and replace them to that location. -- :code:`$WORKFLOW{"other_workflow.yaml"}`: Refer to another process defined using YAML file (:code:`other_workflow.yaml`) and replace the process name in this location. -- :code:`$ENV{env_name}`: Read the environment variable (:code:`env_name`) and replace it to that location. -- :code:`${CONFIG.key_name}`: Read the configuration value of key (:code:`key_name`) and it them to that location. - - -In addition, when loading the file path use :code:`$FILE{"file_name"}` or :code:`$WORKFLOW{"other_workflow.yaml"}`, pydolphinscheduler will search in the path of the YAMl file if the file does not exist. - -For exmaples, our file directory structure is as follows: - -.. code-block:: bash - - . - └── yaml_define - ├── Condition.yaml - ├── DataX.yaml - ├── Dependent_External.yaml - ├── Dependent.yaml - ├── example_datax.json - ├── example_sql.sql - ├── example_subprocess.yaml - ├── Flink.yaml - ├── Http.yaml - ├── MapReduce.yaml - ├── MoreConfiguration.yaml - ├── Procedure.yaml - ├── Python.yaml - ├── Shell.yaml - ├── Spark.yaml - ├── Sql.yaml - ├── SubProcess.yaml - └── Switch.yaml - -After we run - -.. code-block:: bash - - pydolphinscheduler yaml -file yaml_define/SubProcess.yaml - - -the :code:`$WORKFLOW{"example_sub_workflow.yaml"}` will be set to :code:`$WORKFLOW{"yaml_define/example_sub_workflow.yaml"}`, because :code:`./example_subprocess.yaml` does not exist and :code:`yaml_define/example_sub_workflow.yaml` does. - -Furthermore, this feature supports recursion all the way down. - - -.. _`DolphinScheduler project page`: https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/guide/project.html -.. _`Python context manager`: https://docs.python.org/3/library/stdtypes.html#context-manager-types diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Condition.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Condition.yaml deleted file mode 100644 index c65b8c7aeb..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Condition.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Condition" - -# Define the tasks under the workflow -tasks: - - { "task_type": "Shell", "name": "pre_task_1", "command": "echo pre_task_1" } - - { "task_type": "Shell", "name": "pre_task_2", "command": "echo pre_task_2" } - - { "task_type": "Shell", "name": "pre_task_3", "command": "echo pre_task_3" } - - { "task_type": "Shell", "name": "success_branch", "command": "echo success_branch" } - - { "task_type": "Shell", "name": "fail_branch", "command": "echo fail_branch" } - - - name: condition - task_type: Condition - success_task: success_branch - failed_task: fail_branch - op: AND - groups: - - op: AND - groups: - - task: pre_task_1 - flag: true - - task: pre_task_2 - flag: true - - task: pre_task_3 - flag: false diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/DataX.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/DataX.yaml deleted file mode 100644 index 00ecd54685..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/DataX.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "DataX" - -# Define the tasks under the workflow -tasks: - - name: task - task_type: DataX - datasource_name: db - datatarget_name: db - sql: show tables; - target_table: table_test - - - name: task_custon_config - task_type: CustomDataX - json: $FILE{"example_datax.json"} diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent.yaml deleted file mode 100644 index d69fac05da..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent.yaml +++ /dev/null @@ -1,76 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -workflow: - name: "Dependent" - -# Define the tasks under the workflow -tasks: - - name: dependent - task_type: Dependent - denpendence: - op: and - groups: - - op: or - groups: - - project_name: pydolphin - process_definition_name: task_dependent_external - dependent_task_name: task_1 - - - project_name: pydolphin - process_definition_name: task_dependent_external - dependent_task_name: task_2 - - - op: and - groups: - - project_name: pydolphin - process_definition_name: task_dependent_external - dependent_task_name: task_1 - dependent_date: LAST_WEDNESDAY - - - project_name: pydolphin - process_definition_name: task_dependent_external - dependent_task_name: task_2 - dependent_date: last24Hours - - - name: dependent_var - task_type: Dependent - denpendence: - op: and - groups: - - op: or - # we can use ${CONFIG.WORKFLOW_PROJECT} to set the value to configuration.WORKFLOW_PROJECT - # we can use $WORKFLOW{"Dependent_External.yaml"} to create or update a workflow from dependent_external.yaml and set the value to that workflow name - groups: - - project_name: ${CONFIG.WORKFLOW_PROJECT} - process_definition_name: $WORKFLOW{"Dependent_External.yaml"} - dependent_task_name: task_1 - - - project_name: ${CONFIG.WORKFLOW_PROJECT} - process_definition_name: $WORKFLOW{"Dependent_External.yaml"} - dependent_task_name: task_2 - - op: and - groups: - - project_name: ${CONFIG.WORKFLOW_PROJECT} - process_definition_name: $WORKFLOW{"Dependent_External.yaml"} - dependent_task_name: task_1 - dependent_date: LAST_WEDNESDAY - - - project_name: ${CONFIG.WORKFLOW_PROJECT} - process_definition_name: $WORKFLOW{"Dependent_External.yaml"} - dependent_task_name: task_2 - dependent_date: last24Hours diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent_External.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent_External.yaml deleted file mode 100644 index 577ff6a807..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dependent_External.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "task_dependent_external" - -# Define the tasks under the workflow -tasks: - - { "task_type": "Shell", "name": "task_1", "command": "echo task 1" } - - { "task_type": "Shell", "name": "task_2", "command": "echo task 2" } - - { "task_type": "Shell", "name": "task_3", "command": "echo task 3" } diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dvc.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dvc.yaml deleted file mode 100644 index a6ec18c372..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Dvc.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define variable `repository` -repository: &repository "git@github.com:/dvc-data-repository-example.git" - -# Define the workflow -workflow: - name: "DVC" - release_state: "offline" - -# Define the tasks under the process -tasks: - - name: init_dvc - task_type: DVCInit - repository: *repository - store_url: ~/dvc_data - - - name: upload_data - task_type: DVCUpload - repository: *repository - data_path_in_dvc_repository: "iris" - data_path_in_worker: ~/source/iris - version: v1 - message: upload iris data v1 - - - name: download_data - task_type: DVCDownload - repository: *repository - data_path_in_dvc_repository: "iris" - data_path_in_worker: ~/target/iris - version: v1 diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Flink.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Flink.yaml deleted file mode 100644 index 2449d435a3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Flink.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Flink" - -# Define the tasks under the workflow -tasks: - - name: task - task_type: Flink - main_class: org.apache.flink.streaming.examples.wordcount.WordCount - main_package: test_java.jar - program_type: JAVA - deploy_mode: local diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Http.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Http.yaml deleted file mode 100644 index 1483aeb3d8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Http.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Http" - -# Define the tasks under the workflow -tasks: - - name: task - task_type: Http - url: "https://httpbin.org/get" - http_method: "GET" - http_params: - - { "prop": "a", "httpParametersType": "PARAMETER", "value": "1" } - - { "prop": "b", "httpParametersType": "PARAMETER", "value": "2" } - - { - "prop": "Content-Type", - "httpParametersType": "header", - "value": "test", - } - http_check_condition: "STATUS_CODE_CUSTOM" - condition: "404" diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MapReduce.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MapReduce.yaml deleted file mode 100644 index e1a2b5709c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MapReduce.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "MapReduce" - -# Define the tasks under the workflow -tasks: - - name: task - task_type: MR - main_class: wordcount - main_package: test_java.jar - program_type: SCALA - main_args: /dolphinscheduler/tenant_exists/resources/file.txt /output/ds diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MoreConfiguration.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MoreConfiguration.yaml deleted file mode 100644 index 258aa33433..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/MoreConfiguration.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "MoreConfiguration" - param: - n: 1 - -# Define the tasks under the workflow -tasks: - - name: shell_0 - task_type: Shell - description: "yaml define task" - flag: "YES" - command: | - echo "$ENV{HOME}" - echo "${n}" - task_priority: "HIGH" - delay_time: 20 - fail_retry_times: 30 - fail_retry_interval: 5 - timeout_flag: "CLOSE" - timeout: 60 - local_params: - - { "prop": "n", "direct": "IN", "type": "VARCHAR", "value": "${n}" } diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/OpenMLDB.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/OpenMLDB.yaml deleted file mode 100644 index b455cb0768..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/OpenMLDB.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "OpenMLDB" - -# Define the tasks under the workflow -tasks: - - name: OpenMLDB - task_type: OpenMLDB - zookeeper: "127.0.0.1:2181" - zookeeper_path: "/openmldb" - execute_mode: "online" - sql: | - USE demo_db; - set @@job_timeout=200000; - LOAD DATA INFILE 'file:///tmp/train_sample.csv' - INTO TABLE talkingdata OPTIONS(mode='overwrite'); diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Procedure.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Procedure.yaml deleted file mode 100644 index 829a961c1a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Procedure.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Procedure" - -# Define the tasks under the workflow -tasks: - - name: task - task_type: Procedure - datasource_name: db - method: show tables; diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Python.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Python.yaml deleted file mode 100644 index 728b5c928e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Python.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Python" - -# Define the tasks under the workflow -tasks: - - name: python - task_type: Python - definition: | - import os - print(os) - print("1") - print("2") diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Pytorch.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Pytorch.yaml deleted file mode 100644 index 8706824245..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Pytorch.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Pytorch" - -# Define the tasks under the workflow -tasks: - - # run project with existing environment - - name: task_existing_env - task_type: pytorch - script: main.py - script_params: --dry-run --no-cuda - project_path: https://github.com/pytorch/examples#mnist - python_command: /home/anaconda3/envs/pytorch/bin/python3 - - - # run project with creating conda environment - - name: task_conda_env - task_type: pytorch - script: main.py - script_params: --dry-run --no-cuda - project_path: https://github.com/pytorch/examples#mnist - is_create_environment: True - python_env_tool: conda - requirements: requirements.txt - conda_python_version: 3.7 - - # run project with creating virtualenv environment - - name: task_virtualenv_env - task_type: pytorch - script: main.py - script_params: --dry-run --no-cuda - project_path: https://github.com/pytorch/examples#mnist - is_create_environment: True - python_env_tool: virtualenv - requirements: requirements.txt diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sagemaker.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sagemaker.yaml deleted file mode 100644 index 9f77a3caa8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sagemaker.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Sagemaker" - release_state: "offline" - -# Define the tasks under the process -tasks: - - name: sagemaker - task_type: Sagemaker - sagemaker_request_json: $FILE{"example_sagemaker_params.json"} - diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Shell.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Shell.yaml deleted file mode 100644 index fdbe126327..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Shell.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Shell" - release_state: "offline" - run: true - -# Define the tasks under the process -tasks: - - name: task_parent - task_type: Shell - command: | - echo hello pydolphinscheduler - echo run task parent - - - name: task_child_one - task_type: Shell - deps: [task_parent] - command: echo "child one" - - - name: task_child_two - task_type: Shell - deps: [task_parent] - command: echo "child two" diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Spark.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Spark.yaml deleted file mode 100644 index e45514bbf1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Spark.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Spark" - -# Define the tasks under the workflow -tasks: - - name: task - task_type: Spark - main_class: org.apache.spark.examples.SparkPi - main_package: test_java.jar - program_type: SCALA - deploy_mode: local diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sql.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sql.yaml deleted file mode 100644 index c3c7e88ee1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Sql.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Sql" - -# Define the tasks under the workflow -tasks: - - name: task_base - task_type: Sql - datasource_name: "db" - sql: show tables; - - - name: task_multi_line - task_type: Sql - datasource_name: "db" - sql: | - show tables; - select id from version where id=1; - - - name: task_file - task_type: Sql - datasource_name: "db" - sql: $FILE{"example_sql.sql"} - - # Or you can define task "task_union" it with one line - - { "task_type": "Sql", "name": "task_base_one_line", "datasource_name": "db", "sql": "select id from version where id=1;"} - - # Or you can define task "task_union" it with one line - - { "task_type": "Sql", "name": "task_file_one_line", "datasource_name": "db", "sql": '$FILE{"example_sql.sql"}'} diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/SubProcess.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/SubProcess.yaml deleted file mode 100644 index 0ea7549db4..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/SubProcess.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "SubWorkflow" - -tasks: - - name: example_workflow - task_type: SubProcess - process_definition_name: $WORKFLOW{"example_sub_workflow.yaml"} - - - { "task_type": "Shell", "deps": [example_workflow], "name": "task_3", "command": "echo task 3" } diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Switch.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Switch.yaml deleted file mode 100644 index 33ed68813e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/Switch.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "Switch" - param: - var: 1 - -# Define the tasks under the workflow -tasks: - - name: switch_child_1 - task_type: Shell - command: echo switch_child_1 - - - name: switch_child_2 - task_type: Shell - command: echo switch_child_2 - - - name: switch - task_type: Switch - condition: - - task: switch_child_1 - condition: "${var} > 1" - - task: switch_child_2 diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_datax.json b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_datax.json deleted file mode 100644 index 3db8092cb6..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_datax.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "job": { - "content": [ - { - "reader": { - "name": "mysqlreader", - "parameter": { - "username": "usr", - "password": "pwd", - "column": [ - "id", - "name", - "code", - "description" - ], - "splitPk": "id", - "connection": [ - { - "table": [ - "source_table" - ], - "jdbcUrl": [ - "jdbc:mysql://127.0.0.1:3306/source_db" - ] - } - ] - } - }, - "writer": { - "name": "mysqlwriter", - "parameter": { - "writeMode": "insert", - "username": "usr", - "password": "pwd", - "column": [ - "id", - "name" - ], - "connection": [ - { - "jdbcUrl": "jdbc:mysql://127.0.0.1:3306/target_db", - "table": [ - "target_table" - ] - } - ] - } - } - } - ], - "setting": { - "errorLimit": { - "percentage": 0, - "record": 0 - }, - "speed": { - "channel": 1, - "record": 1000 - } - } - } -} diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sagemaker_params.json b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sagemaker_params.json deleted file mode 100644 index 9403320355..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sagemaker_params.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "ParallelismConfiguration":{ - "MaxParallelExecutionSteps":1 - }, - "PipelineExecutionDescription":"run pipeline using ds", - "PipelineExecutionDisplayName":"ds-sagemaker-pipeline", - "PipelineName":"DsSagemakerPipeline", - "PipelineParameters":[ - { - "Name":"InputData", - "Value": "s3://sagemaker/dataset/dataset.csv" - }, - { - "Name":"InferenceData", - "Value": "s3://sagemaker/dataset/inference.csv" - } - ] -} diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sql.sql b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sql.sql deleted file mode 100644 index 06b5c4c16c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sql.sql +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -select id from version where id=1; -select id from version where id=2; -select id from version where id=3; -select id from version where id=4; -select id from version where id=5; diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sub_workflow.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sub_workflow.yaml deleted file mode 100644 index af3a863da9..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/example_sub_workflow.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "example_workflow_for_sub_workflow" - -# Define the tasks under the workflow -tasks: - - { "task_type": "Shell", "name": "task_1", "command": "echo task 1" } - - { "task_type": "Shell", "deps": [task_1], "name": "task_2", "command": "echo task 2" } - - { "task_type": "Shell", "deps": [task_2], "name": "task_3", "command": "echo task 3" } diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml deleted file mode 100644 index 45e56726e1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/mlflow.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -# Define variable `mlflow_tracking_uri` -mlflow_tracking_uri: &mlflow_tracking_uri "http://127.0.0.1:5000" - -# Define the workflow -workflow: - name: "MLflow" - -# Define the tasks under the workflow -tasks: - - name: train_xgboost_native - task_type: MLFlowProjectsCustom - repository: https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native - mlflow_tracking_uri: *mlflow_tracking_uri - parameters: -P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9 - experiment_name: xgboost - - - name: train_automl - task_type: MLFlowProjectsAutoML - mlflow_tracking_uri: *mlflow_tracking_uri - parameters: time_budget=30;estimator_list=['lgbm'] - experiment_name: automl_iris - model_name: iris_A - automl_tool: flaml - data_path: /data/examples/iris - - - name: deploy_docker - task_type: MLflowModels - deps: [train_automl] - model_uri: models:/iris_A/Production - mlflow_tracking_uri: *mlflow_tracking_uri - deploy_mode: DOCKER - port: 7002 - - - name: train_basic_algorithm - task_type: MLFlowProjectsBasicAlgorithm - mlflow_tracking_uri: *mlflow_tracking_uri - parameters: n_estimators=200;learning_rate=0.2 - experiment_name: basic_algorithm_iris - model_name: iris_B - algorithm: lightgbm - data_path: /data/examples/iris - search_params: max_depth=[5, 10];n_estimators=[100, 200] - - - name: deploy_mlflow - deps: [train_basic_algorithm] - task_type: MLflowModels - model_uri: models:/iris_B/Production - mlflow_tracking_uri: *mlflow_tracking_uri - deploy_mode: MLFLOW - port: 7001 - diff --git a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/tutorial.yaml b/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/tutorial.yaml deleted file mode 100644 index 104a8c367b..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/examples/yaml_define/tutorial.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Define the workflow -workflow: - name: "tutorial" - schedule: "0 0 0 * * ? *" - start_time: "2021-01-01" - tenant: "tenant_exists" - release_state: "offline" - run: true - -# Define the tasks under the workflow -tasks: - - name: task_parent - task_type: Shell - command: echo hello pydolphinscheduler - - - name: task_child_one - task_type: Shell - deps: [task_parent] - command: echo "child one" - - - name: task_child_two - task_type: Shell - deps: [task_parent] - command: echo "child two" - - - name: task_union - task_type: Shell - deps: [task_child_one, task_child_two] - command: echo "union" diff --git a/dolphinscheduler-python/pydolphinscheduler/pytest.ini b/dolphinscheduler-python/pydolphinscheduler/pytest.ini deleted file mode 100644 index b1aa850346..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/pytest.ini +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[pytest] -# add path here to skip pytest scan it -norecursedirs = - tests/testing - # Integration test run seperated which do not calculate coverage, it will run in `tox -e integrate-test` - tests/integration diff --git a/dolphinscheduler-python/pydolphinscheduler/setup.cfg b/dolphinscheduler-python/pydolphinscheduler/setup.cfg deleted file mode 100644 index 13a83393a9..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/setup.cfg +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/dolphinscheduler-python/pydolphinscheduler/setup.py b/dolphinscheduler-python/pydolphinscheduler/setup.py deleted file mode 100644 index 66a1ffc86c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/setup.py +++ /dev/null @@ -1,198 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""The script for setting up pydolphinscheduler.""" -import logging -import os -import sys -from distutils.dir_util import remove_tree -from os.path import dirname, join -from typing import List - -from setuptools import Command, find_packages, setup - -if sys.version_info[0] < 3: - raise Exception( - "pydolphinscheduler does not support Python 2. Please upgrade to Python 3." - ) - -logger = logging.getLogger(__name__) - -version = "dev" - -# Start package required -prod = [ - "boto3>=1.23.10", - "oss2>=2.16.0", - "python-gitlab>=2.10.1", - "click>=8.0.0", - "py4j~=0.10", - "ruamel.yaml", -] - -build = [ - "build", - "setuptools>=42", - "wheel", -] - -doc = [ - "sphinx>=4.3", - "sphinx_rtd_theme>=1.0", - "sphinx-click>=3.0", - "sphinx-inline-tabs", - "sphinx-copybutton>=0.4.0", - # Unreleased package have a feature we want(use correct version package for API ref), so we install from - # GitHub directly, see also: - # https://github.com/Holzhaus/sphinx-multiversion/issues/42#issuecomment-1210539786 - "sphinx-multiversion @ git+https://github.com/Holzhaus/sphinx-multiversion#egg=sphinx-multiversion", -] - -test = [ - "pytest>=6.2", - "freezegun>=1.1", - "coverage>=6.1", - "pytest-cov>=3.0", - "docker>=5.0.3", -] - -style = [ - "flake8>=4.0", - "flake8-docstrings>=1.6", - "flake8-black>=0.2", - "isort>=5.10", - "autoflake>=1.4", -] - -dev = style + test + doc + build - -all_dep = prod + dev -# End package required - - -def read(*names, **kwargs): - """Read file content from given file path.""" - return open( - join(dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8") - ).read() - - -class CleanCommand(Command): - """Command to clean up python api before setup by running `python setup.py pre_clean`.""" - - description = "Clean up project root" - user_options: List[str] = [] - clean_list = [ - "build", - "htmlcov", - "dist", - ".pytest_cache", - ".coverage", - ] - - def initialize_options(self) -> None: - """Set default values for options.""" - - def finalize_options(self) -> None: - """Set final values for options.""" - - def run(self) -> None: - """Run and remove temporary files.""" - for cl in self.clean_list: - if not os.path.exists(cl): - logger.info("Path %s do not exists.", cl) - elif os.path.isdir(cl): - remove_tree(cl) - else: - os.remove(cl) - logger.info("Finish pre_clean process.") - - -setup( - name="apache-dolphinscheduler", - version=version, - license="Apache License 2.0", - description="Apache DolphinScheduler Python API", - long_description=read("README.md"), - # Make sure pypi is expecting markdown - long_description_content_type="text/markdown", - author="Apache Software Foundation", - author_email="dev@dolphinscheduler.apache.org", - url="https://dolphinscheduler.apache.org/", - python_requires=">=3.6", - keywords=[ - "dolphinscheduler", - "workflow", - "scheduler", - "taskflow", - ], - project_urls={ - "Homepage": "https://dolphinscheduler.apache.org", - "Documentation": "https://dolphinscheduler.apache.org/python/dev/index.html", - "Source": "https://github.com/apache/dolphinscheduler/tree/dev/dolphinscheduler-python/" - "pydolphinscheduler", - "Issue Tracker": "https://github.com/apache/dolphinscheduler/issues?" - "q=is%3Aissue+is%3Aopen+label%3APython", - "Discussion": "https://github.com/apache/dolphinscheduler/discussions", - "Twitter": "https://twitter.com/dolphinschedule", - }, - packages=find_packages(where="src"), - package_dir={"": "src"}, - include_package_data=True, - package_data={ - "pydolphinscheduler": ["default_config.yaml"], - }, - platforms=["any"], - classifiers=[ - # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers - "Development Status :: 4 - Beta", - "Environment :: Console", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "Operating System :: Unix", - "Operating System :: POSIX", - "Operating System :: Microsoft :: Windows", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", - "Topic :: Software Development :: User Interfaces", - ], - install_requires=prod, - extras_require={ - "all": all_dep, - "dev": dev, - "style": style, - "test": test, - "doc": doc, - "build": build, - }, - cmdclass={ - "pre_clean": CleanCommand, - }, - entry_points={ - "console_scripts": [ - "pydolphinscheduler = pydolphinscheduler.cli.commands:cli", - ], - }, -) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py deleted file mode 100644 index 2a7b55430c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init root of pydolphinscheduler.""" - -from pkg_resources import get_distribution - -__version__ = get_distribution("apache-dolphinscheduler").version diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/__init__.py deleted file mode 100644 index 5f30c83241..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Commands line interface of pydolphinscheduler.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/commands.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/commands.py deleted file mode 100644 index 8d923f1406..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/cli/commands.py +++ /dev/null @@ -1,106 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Commands line interface's command of pydolphinscheduler.""" - -import click -from click import echo - -import pydolphinscheduler -from pydolphinscheduler.configuration import ( - get_single_config, - init_config_file, - set_single_config, -) -from pydolphinscheduler.core.yaml_process_define import create_process_definition - -version_option_val = ["major", "minor", "micro"] - - -@click.group() -def cli(): - """Apache DolphinScheduler Python API's command line interface.""" - - -@cli.command() -@click.option( - "--part", - "-p", - required=False, - type=click.Choice(version_option_val, case_sensitive=False), - multiple=False, - help="The part of version your want to get.", -) -def version(part: str) -> None: - """Show current version of pydolphinscheduler.""" - if part: - idx = version_option_val.index(part) - echo(f"{pydolphinscheduler.__version__.split('.')[idx]}") - else: - echo(f"{pydolphinscheduler.__version__}") - - -@cli.command() -@click.option( - "--init", - "-i", - is_flag=True, - help="Initialize and create configuration file to `PYDS_HOME`.", -) -@click.option( - "--set", - "-s", - "setter", - multiple=True, - type=click.Tuple([str, str]), - help="Set specific setting to config file." - "Use multiple ``--set `` options to set multiple configs", -) -@click.option( - "--get", - "-g", - "getter", - multiple=True, - type=str, - help="Get specific setting from config file." - "Use multiple ``--get `` options to get multiple configs", -) -def config(getter, setter, init) -> None: - """Manage the configuration for pydolphinscheduler.""" - if init: - init_config_file() - elif getter: - click.echo("The configuration query as below:\n") - configs_kv = [f"{key} = {get_single_config(key)}" for key in getter] - click.echo("\n".join(configs_kv)) - elif setter: - for key, val in setter: - set_single_config(key, val) - click.echo("Set configuration done.") - - -@cli.command() -@click.option( - "--yaml_file", - "-f", - required=True, - help="YAML file path", - type=click.Path(exists=True), -) -def yaml(yaml_file) -> None: - """Create process definition using YAML file.""" - create_process_definition(yaml_file) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/configuration.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/configuration.py deleted file mode 100644 index 860f9869f3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/configuration.py +++ /dev/null @@ -1,193 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Configuration module for pydolphinscheduler.""" -import os -from pathlib import Path -from typing import Any - -from pydolphinscheduler.exceptions import PyDSConfException -from pydolphinscheduler.utils import file -from pydolphinscheduler.utils.yaml_parser import YamlParser - -BUILD_IN_CONFIG_PATH = Path(__file__).resolve().parent.joinpath("default_config.yaml") - - -def config_path() -> Path: - """Get the path of pydolphinscheduler configuration file.""" - pyds_home = os.environ.get("PYDS_HOME", "~/pydolphinscheduler") - config_file_path = Path(pyds_home).joinpath("config.yaml").expanduser() - return config_file_path - - -def get_configs() -> YamlParser: - """Get all configuration settings from configuration file. - - Will use custom configuration file first if it exists, otherwise default configuration file in - default path. - """ - path = str(config_path()) if config_path().exists() else BUILD_IN_CONFIG_PATH - with open(path, mode="r") as f: - return YamlParser(f.read()) - - -def init_config_file() -> None: - """Initialize configuration file by default configs.""" - if config_path().exists(): - raise PyDSConfException( - "Initialize configuration false to avoid overwrite configure by accident, file already exists " - "in %s, if you wan to overwrite the exists configure please remove the exists file manually.", - str(config_path()), - ) - file.write(content=str(get_configs()), to_path=str(config_path())) - - -def get_single_config(key: str) -> Any: - """Get single config to configuration file. - - Support get from nested keys by delimiter ``.``. - - For example, yaml config as below: - - .. code-block:: yaml - - one: - two1: - three: value1 - two2: value2 - - you could get ``value1`` and ``value2`` by nested path - - .. code-block:: python - - value1 = get_single_config("one.two1.three") - value2 = get_single_config("one.two2") - - :param key: The config key want to get it value. - """ - config = get_configs() - if key not in config: - raise PyDSConfException( - "Configuration path %s do not exists. Can not get configuration.", key - ) - return config[key] - - -def set_single_config(key: str, value: Any) -> None: - """Change single config to configuration file. - - For example, yaml config as below: - - .. code-block:: yaml - - one: - two1: - three: value1 - two2: value2 - - you could change ``value1`` to ``value3``, also change ``value2`` to ``value4`` by nested path assigned - - .. code-block:: python - - set_single_config["one.two1.three"] = "value3" - set_single_config["one.two2"] = "value4" - - :param key: The config key want change. - :param value: The new value want to set. - """ - config = get_configs() - if key not in config: - raise PyDSConfException( - "Configuration path %s do not exists. Can not set configuration.", key - ) - config[key] = value - file.write(content=str(config), to_path=str(config_path()), overwrite=True) - - -def get_int(val: Any) -> int: - """Covert value to int.""" - return int(val) - - -def get_bool(val: Any) -> bool: - """Covert value to boolean.""" - if isinstance(val, str): - return val.lower() in {"true", "t"} - elif isinstance(val, int): - return val == 1 - else: - return bool(val) - - -# Start Common Configuration Settings - -# Add configs as module variables to avoid read configuration multiple times when -# Get common configuration setting -# Set or get multiple configs in single time -configs: YamlParser = get_configs() - -# Java Gateway Settings -JAVA_GATEWAY_ADDRESS = os.environ.get( - "PYDS_JAVA_GATEWAY_ADDRESS", configs.get("java_gateway.address") -) -JAVA_GATEWAY_PORT = get_int( - os.environ.get("PYDS_JAVA_GATEWAY_PORT", configs.get("java_gateway.port")) -) -JAVA_GATEWAY_AUTO_CONVERT = get_bool( - os.environ.get( - "PYDS_JAVA_GATEWAY_AUTO_CONVERT", configs.get("java_gateway.auto_convert") - ) -) - -# User Settings -USER_NAME = os.environ.get("PYDS_USER_NAME", configs.get("default.user.name")) -USER_PASSWORD = os.environ.get( - "PYDS_USER_PASSWORD", configs.get("default.user.password") -) -USER_EMAIL = os.environ.get("PYDS_USER_EMAIL", configs.get("default.user.email")) -USER_PHONE = str(os.environ.get("PYDS_USER_PHONE", configs.get("default.user.phone"))) -USER_STATE = get_int( - os.environ.get("PYDS_USER_STATE", configs.get("default.user.state")) -) - -# Workflow Settings -WORKFLOW_PROJECT = os.environ.get( - "PYDS_WORKFLOW_PROJECT", configs.get("default.workflow.project") -) -WORKFLOW_TENANT = os.environ.get( - "PYDS_WORKFLOW_TENANT", configs.get("default.workflow.tenant") -) -WORKFLOW_USER = os.environ.get( - "PYDS_WORKFLOW_USER", configs.get("default.workflow.user") -) -WORKFLOW_QUEUE = os.environ.get( - "PYDS_WORKFLOW_QUEUE", configs.get("default.workflow.queue") -) -WORKFLOW_RELEASE_STATE = os.environ.get( - "PYDS_WORKFLOW_RELEASE_STATE", configs.get("default.workflow.release_state") -) -WORKFLOW_WORKER_GROUP = os.environ.get( - "PYDS_WORKFLOW_WORKER_GROUP", configs.get("default.workflow.worker_group") -) -WORKFLOW_TIME_ZONE = os.environ.get( - "PYDS_WORKFLOW_TIME_ZONE", configs.get("default.workflow.time_zone") -) -WORKFLOW_WARNING_TYPE = os.environ.get( - "PYDS_WORKFLOW_WARNING_TYPE", configs.get("default.workflow.warning_type") -) - -# End Common Configuration Setting diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py deleted file mode 100644 index bedbbf2f5e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py +++ /dev/null @@ -1,122 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Constants for pydolphinscheduler.""" - - -class TaskPriority(str): - """Constants for task priority.""" - - HIGHEST = "HIGHEST" - HIGH = "HIGH" - MEDIUM = "MEDIUM" - LOW = "LOW" - LOWEST = "LOWEST" - - -class TaskFlag(str): - """Constants for task flag.""" - - YES = "YES" - NO = "NO" - - -class TaskTimeoutFlag(str): - """Constants for task timeout flag.""" - - CLOSE = "CLOSE" - - -class TaskType(str): - """Constants for task type, it will also show you which kind we support up to now.""" - - SHELL = "SHELL" - HTTP = "HTTP" - PYTHON = "PYTHON" - SQL = "SQL" - SUB_PROCESS = "SUB_PROCESS" - PROCEDURE = "PROCEDURE" - DATAX = "DATAX" - DEPENDENT = "DEPENDENT" - CONDITIONS = "CONDITIONS" - SWITCH = "SWITCH" - FLINK = "FLINK" - SPARK = "SPARK" - MR = "MR" - SAGEMAKER = "SAGEMAKER" - MLFLOW = "MLFLOW" - OPENMLDB = "OPENMLDB" - PYTORCH = "PYTORCH" - DVC = "DVC" - - -class DefaultTaskCodeNum(str): - """Constants and default value for default task code number.""" - - DEFAULT = 1 - - -class JavaGatewayDefault(str): - """Constants and default value for java gateway.""" - - RESULT_MESSAGE_KEYWORD = "msg" - RESULT_MESSAGE_SUCCESS = "success" - - RESULT_STATUS_KEYWORD = "status" - RESULT_STATUS_SUCCESS = "SUCCESS" - - RESULT_DATA = "data" - - -class Delimiter(str): - """Constants for delimiter.""" - - BAR = "-" - DASH = "/" - COLON = ":" - UNDERSCORE = "_" - DIRECTION = "->" - - -class Time(str): - """Constants for date.""" - - FMT_STD_DATE = "%Y-%m-%d" - LEN_STD_DATE = 10 - - FMT_DASH_DATE = "%Y/%m/%d" - - FMT_SHORT_DATE = "%Y%m%d" - LEN_SHORT_DATE = 8 - - FMT_STD_TIME = "%H:%M:%S" - FMT_NO_COLON_TIME = "%H%M%S" - - -class ResourceKey(str): - """Constants for key of resource.""" - - ID = "id" - - -class Symbol(str): - """Constants for symbol.""" - - SLASH = "/" - POINT = "." - COMMA = "," - UNDERLINE = "_" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py deleted file mode 100644 index b997c3e9de..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init pydolphinscheduler.core package.""" - -from pydolphinscheduler.core.database import Database -from pydolphinscheduler.core.engine import Engine -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.core.task import Task - -__all__ = [ - "Database", - "Engine", - "ProcessDefinition", - "Task", -] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/database.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/database.py deleted file mode 100644 index 4a93f22f3f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/database.py +++ /dev/null @@ -1,62 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module database.""" - -from typing import Dict - -from py4j.protocol import Py4JJavaError - -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.java_gateway import JavaGate - - -class Database(dict): - """database object, get information about database. - - You provider database_name contain connection information, it decisions which - database type and database instance would run task. - """ - - def __init__(self, database_name: str, type_key, database_key, *args, **kwargs): - super().__init__(*args, **kwargs) - self._database = {} - self.database_name = database_name - self[type_key] = self.database_type - self[database_key] = self.database_id - - @property - def database_type(self) -> str: - """Get database type from java gateway, a wrapper for :func:`get_database_info`.""" - return self.get_database_info(self.database_name).get("type") - - @property - def database_id(self) -> str: - """Get database id from java gateway, a wrapper for :func:`get_database_info`.""" - return self.get_database_info(self.database_name).get("id") - - def get_database_info(self, name) -> Dict: - """Get database info from java gateway, contains database id, type, name.""" - if self._database: - return self._database - else: - try: - self._database = JavaGate().get_datasource_info(name) - # Handler database source do not exists error, for now we just terminate the process. - except Py4JJavaError as ex: - raise PyDSParamException(str(ex.java_exception)) - return self._database diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/engine.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/engine.py deleted file mode 100644 index 41021ed474..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/engine.py +++ /dev/null @@ -1,94 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module engine.""" - -from typing import Dict, Optional - -from py4j.protocol import Py4JJavaError - -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.java_gateway import JavaGate - - -class ProgramType(str): - """Type of program engine runs, for now it just contain `JAVA`, `SCALA` and `PYTHON`.""" - - JAVA = "JAVA" - SCALA = "SCALA" - PYTHON = "PYTHON" - - -class Engine(Task): - """Task engine object, declare behavior for engine task to dolphinscheduler. - - This is the parent class of spark, flink and mr tasks, - and is used to provide the programType, mainClass and mainJar task parameters for reuse. - """ - - def __init__( - self, - name: str, - task_type: str, - main_class: str, - main_package: str, - program_type: Optional[ProgramType] = ProgramType.SCALA, - *args, - **kwargs - ): - super().__init__(name, task_type, *args, **kwargs) - self.main_class = main_class - self.main_package = main_package - self.program_type = program_type - self._resource = {} - - def get_resource_info(self, program_type, main_package): - """Get resource info from java gateway, contains resource id, name.""" - if self._resource: - return self._resource - else: - try: - self._resource = JavaGate().get_resources_file_info( - program_type, main_package - ) - # Handler source do not exists error, for now we just terminate the process. - except Py4JJavaError as ex: - raise PyDSParamException(str(ex.java_exception)) - return self._resource - - def get_jar_id(self) -> int: - """Get jar id from java gateway, a wrapper for :func:`get_resource_info`.""" - return self.get_resource_info(self.program_type, self.main_package).get("id") - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for engine children task. - - children task have some specials attribute for task_params, and is odd if we - directly set as python property, so we Override Task.task_params here. - """ - params = super().task_params - custom_params = { - "programType": self.program_type, - "mainClass": self.main_class, - "mainJar": { - "id": self.get_jar_id(), - }, - } - params.update(custom_params) - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/process_definition.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/process_definition.py deleted file mode 100644 index 62de7ed1b4..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/process_definition.py +++ /dev/null @@ -1,424 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module process definition, core class for workflow define.""" - -import json -from datetime import datetime -from typing import Any, Dict, List, Optional, Set - -from pydolphinscheduler import configuration -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.resource import Resource -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.exceptions import PyDSParamException, PyDSTaskNoFoundException -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models import Base, Project, Tenant, User -from pydolphinscheduler.utils.date import MAX_DATETIME, conv_from_str, conv_to_schedule - - -class ProcessDefinitionContext: - """Class process definition context, use when task get process definition from context expression.""" - - _context_managed_process_definition: Optional["ProcessDefinition"] = None - - @classmethod - def set(cls, pd: "ProcessDefinition") -> None: - """Set attribute self._context_managed_process_definition.""" - cls._context_managed_process_definition = pd - - @classmethod - def get(cls) -> Optional["ProcessDefinition"]: - """Get attribute self._context_managed_process_definition.""" - return cls._context_managed_process_definition - - @classmethod - def delete(cls) -> None: - """Delete attribute self._context_managed_process_definition.""" - cls._context_managed_process_definition = None - - -class ProcessDefinition(Base): - """process definition object, will define process definition attribute, task, relation. - - TODO: maybe we should rename this class, currently use DS object name. - - :param user: The user for current process definition. Will create a new one if it do not exists. If your - parameter ``project`` already exists but project's create do not belongs to ``user``, will grant - ``project`` to ``user`` automatically. - :param project: The project for current process definition. You could see the workflow in this project - thought Web UI after it :func:`submit` or :func:`run`. It will create a new project belongs to - ``user`` if it does not exists. And when ``project`` exists but project's create do not belongs - to ``user``, will grant `project` to ``user`` automatically. - :param resource_list: Resource files required by the current process definition.You can create and modify - resource files from this field. When the process definition is submitted, these resource files are - also submitted along with it. - """ - - # key attribute for identify ProcessDefinition object - _KEY_ATTR = { - "name", - "project", - "tenant", - "release_state", - "param", - } - - _DEFINE_ATTR = { - "name", - "description", - "_project", - "_tenant", - "worker_group", - "warning_type", - "warning_group_id", - "timeout", - "release_state", - "param", - "tasks", - "task_definition_json", - "task_relation_json", - "resource_list", - } - - def __init__( - self, - name: str, - description: Optional[str] = None, - schedule: Optional[str] = None, - start_time: Optional[str] = None, - end_time: Optional[str] = None, - timezone: Optional[str] = configuration.WORKFLOW_TIME_ZONE, - user: Optional[str] = configuration.WORKFLOW_USER, - project: Optional[str] = configuration.WORKFLOW_PROJECT, - tenant: Optional[str] = configuration.WORKFLOW_TENANT, - worker_group: Optional[str] = configuration.WORKFLOW_WORKER_GROUP, - warning_type: Optional[str] = configuration.WORKFLOW_WARNING_TYPE, - warning_group_id: Optional[int] = 0, - timeout: Optional[int] = 0, - release_state: Optional[str] = configuration.WORKFLOW_RELEASE_STATE, - param: Optional[Dict] = None, - resource_plugin: Optional[ResourcePlugin] = None, - resource_list: Optional[List[Resource]] = None, - ): - super().__init__(name, description) - self.schedule = schedule - self._start_time = start_time - self._end_time = end_time - self.timezone = timezone - self._user = user - self._project = project - self._tenant = tenant - self.worker_group = worker_group - self.warning_type = warning_type - if warning_type.strip().upper() not in ("FAILURE", "SUCCESS", "ALL", "NONE"): - raise PyDSParamException( - "Parameter `warning_type` with unexpect value `%s`", warning_type - ) - else: - self.warning_type = warning_type.strip().upper() - self.warning_group_id = warning_group_id - self.timeout = timeout - self._release_state = release_state - self.param = param - self.tasks: dict = {} - self.resource_plugin = resource_plugin - # TODO how to fix circle import - self._task_relations: set["TaskRelation"] = set() # noqa: F821 - self._process_definition_code = None - self.resource_list = resource_list or [] - - def __enter__(self) -> "ProcessDefinition": - ProcessDefinitionContext.set(self) - return self - - def __exit__(self, exc_type, exc_val, exc_tb) -> None: - ProcessDefinitionContext.delete() - - @property - def tenant(self) -> Tenant: - """Get attribute tenant.""" - return Tenant(self._tenant) - - @tenant.setter - def tenant(self, tenant: Tenant) -> None: - """Set attribute tenant.""" - self._tenant = tenant.name - - @property - def project(self) -> Project: - """Get attribute project.""" - return Project(self._project) - - @project.setter - def project(self, project: Project) -> None: - """Set attribute project.""" - self._project = project.name - - @property - def user(self) -> User: - """Get user object. - - For now we just get from python models but not from java gateway models, so it may not correct. - """ - return User(name=self._user, tenant=self._tenant) - - @staticmethod - def _parse_datetime(val: Any) -> Any: - if val is None or isinstance(val, datetime): - return val - elif isinstance(val, str): - return conv_from_str(val) - else: - raise PyDSParamException("Do not support value type %s for now", type(val)) - - @property - def start_time(self) -> Any: - """Get attribute start_time.""" - return self._parse_datetime(self._start_time) - - @start_time.setter - def start_time(self, val) -> None: - """Set attribute start_time.""" - self._start_time = val - - @property - def end_time(self) -> Any: - """Get attribute end_time.""" - return self._parse_datetime(self._end_time) - - @end_time.setter - def end_time(self, val) -> None: - """Set attribute end_time.""" - self._end_time = val - - @property - def release_state(self) -> int: - """Get attribute release_state.""" - rs_ref = { - "online": 1, - "offline": 0, - } - if self._release_state not in rs_ref: - raise PyDSParamException( - "Parameter release_state only support `online` or `offline` but get %", - self._release_state, - ) - return rs_ref[self._release_state] - - @release_state.setter - def release_state(self, val: str) -> None: - """Set attribute release_state.""" - self._release_state = val.lower() - - @property - def param_json(self) -> Optional[List[Dict]]: - """Return param json base on self.param.""" - # Handle empty dict and None value - if not self.param: - return [] - return [ - { - "prop": k, - "direct": "IN", - "type": "VARCHAR", - "value": v, - } - for k, v in self.param.items() - ] - - @property - def task_definition_json(self) -> List[Dict]: - """Return all tasks definition in list of dict.""" - if not self.tasks: - return [self.tasks] - else: - return [task.get_define() for task in self.tasks.values()] - - @property - def task_relation_json(self) -> List[Dict]: - """Return all relation between tasks pair in list of dict.""" - if not self.tasks: - return [self.tasks] - else: - self._handle_root_relation() - return [tr.get_define() for tr in self._task_relations] - - @property - def schedule_json(self) -> Optional[Dict]: - """Get schedule parameter json object. This is requests from java gateway interface.""" - if not self.schedule: - return None - else: - start_time = conv_to_schedule( - self.start_time if self.start_time else datetime.now() - ) - end_time = conv_to_schedule( - self.end_time if self.end_time else MAX_DATETIME - ) - return { - "startTime": start_time, - "endTime": end_time, - "crontab": self.schedule, - "timezoneId": self.timezone, - } - - @property - def task_list(self) -> List["Task"]: # noqa: F821 - """Return list of tasks objects.""" - return list(self.tasks.values()) - - def _handle_root_relation(self): - """Handle root task property :class:`pydolphinscheduler.core.task.TaskRelation`. - - Root task in DAG do not have dominant upstream node, but we have to add an exactly default - upstream task with task_code equal to `0`. This is requests from java gateway interface. - """ - from pydolphinscheduler.core.task import TaskRelation - - post_relation_code = set() - for relation in self._task_relations: - post_relation_code.add(relation.post_task_code) - for task in self.task_list: - if task.code not in post_relation_code: - root_relation = TaskRelation(pre_task_code=0, post_task_code=task.code) - self._task_relations.add(root_relation) - - def add_task(self, task: "Task") -> None: # noqa: F821 - """Add a single task to process definition.""" - self.tasks[task.code] = task - task._process_definition = self - - def add_tasks(self, tasks: List["Task"]) -> None: # noqa: F821 - """Add task sequence to process definition, it a wrapper of :func:`add_task`.""" - for task in tasks: - self.add_task(task) - - def get_task(self, code: str) -> "Task": # noqa: F821 - """Get task object from process definition by given code.""" - if code not in self.tasks: - raise PyDSTaskNoFoundException( - "Task with code %s can not found in process definition %", - (code, self.name), - ) - return self.tasks[code] - - # TODO which tying should return in this case - def get_tasks_by_name(self, name: str) -> Set["Task"]: # noqa: F821 - """Get tasks object by given name, if will return all tasks with this name.""" - find = set() - for task in self.tasks.values(): - if task.name == name: - find.add(task) - return find - - def get_one_task_by_name(self, name: str) -> "Task": # noqa: F821 - """Get exact one task from process definition by given name. - - Function always return one task even though this process definition have more than one task with - this name. - """ - tasks = self.get_tasks_by_name(name) - if not tasks: - raise PyDSTaskNoFoundException(f"Can not find task with name {name}.") - return tasks.pop() - - def run(self): - """Submit and Start ProcessDefinition instance. - - Shortcut for function :func:`submit` and function :func:`start`. Only support manual start workflow - for now, and schedule run will coming soon. - :return: - """ - self.submit() - self.start() - - def _ensure_side_model_exists(self): - """Ensure process definition models model exists. - - For now, models object including :class:`pydolphinscheduler.models.project.Project`, - :class:`pydolphinscheduler.models.tenant.Tenant`, :class:`pydolphinscheduler.models.user.User`. - If these model not exists, would create default value in - :class:`pydolphinscheduler.constants.ProcessDefinitionDefault`. - """ - # TODO used metaclass for more pythonic - self.user.create_if_not_exists() - # Project model need User object exists - self.project.create_if_not_exists(self._user) - - def _pre_submit_check(self): - """Check specific condition satisfy before. - - This method should be called before process definition submit to java gateway - For now, we have below checker: - * `self.param` or at least one local param of task should be set if task `switch` in this workflow. - """ - if ( - any([task.task_type == TaskType.SWITCH for task in self.tasks.values()]) - and self.param is None - and all([len(task.local_params) == 0 for task in self.tasks.values()]) - ): - raise PyDSParamException( - "Parameter param or at least one local_param of task must " - "be provider if task Switch in process definition." - ) - - def submit(self) -> int: - """Submit ProcessDefinition instance to java gateway.""" - self._ensure_side_model_exists() - self._pre_submit_check() - - self._process_definition_code = JavaGate().create_or_update_process_definition( - self._user, - self._project, - self.name, - str(self.description) if self.description else "", - json.dumps(self.param_json), - self.warning_type, - self.warning_group_id, - self.timeout, - self.worker_group, - self._tenant, - self.release_state, - # TODO add serialization function - json.dumps(self.task_relation_json), - json.dumps(self.task_definition_json), - json.dumps(self.schedule_json) if self.schedule_json else None, - None, - None, - ) - if len(self.resource_list) > 0: - for res in self.resource_list: - res.user_name = self._user - res.create_or_update_resource() - return self._process_definition_code - - def start(self) -> None: - """Create and start ProcessDefinition instance. - - which post to `start-process-instance` to java gateway - """ - JavaGate().exec_process_instance( - self._user, - self._project, - self.name, - "", - self.worker_group, - self.warning_type, - self.warning_group_id, - 24 * 3600, - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource.py deleted file mode 100644 index ea811915e2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource.py +++ /dev/null @@ -1,73 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module resource.""" - -from typing import Optional - -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models import Base - - -class Resource(Base): - """resource object, will define the resources that you want to create or update. - - :param name: The fullname of resource.Includes path and suffix. - :param content: The description of resource. - :param description: The description of resource. - :param user_name: The user name of resource. - """ - - _DEFINE_ATTR = {"name", "content", "description", "user_name"} - - def __init__( - self, - name: str, - content: Optional[str] = None, - description: Optional[str] = None, - user_name: Optional[str] = None, - ): - super().__init__(name, description) - self.content = content - self.user_name = user_name - self._resource_code = None - - def get_info_from_database(self): - """Get resource info from java gateway, contains resource id, name.""" - if not self.user_name: - raise PyDSParamException( - "`user_name` is required when querying resources from python gate." - ) - return JavaGate().query_resources_file_info(self.user_name, self.name) - - def get_id_from_database(self): - """Get resource id from java gateway.""" - return self.get_info_from_database().getId() - - def create_or_update_resource(self): - """Create or update resource via java gateway.""" - if not self.content or not self.user_name: - raise PyDSParamException( - "`user_name` and `content` are required when create or update resource from python gate." - ) - JavaGate().create_or_update_resource( - self.user_name, - self.name, - self.content, - self.description, - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource_plugin.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource_plugin.py deleted file mode 100644 index 8b500d165f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/resource_plugin.py +++ /dev/null @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler ResourcePlugin object.""" - -from abc import ABCMeta, abstractmethod - -from pydolphinscheduler.exceptions import PyResPluginException - - -# [start resource_plugin_definition] -class ResourcePlugin(object, metaclass=ABCMeta): - """ResourcePlugin object, declare resource plugin for task and workflow to dolphinscheduler. - - :param prefix: A string representing the prefix of ResourcePlugin. - - """ - - # [start init_method] - def __init__(self, prefix: str, *args, **kwargs): - self.prefix = prefix - - # [end init_method] - - # [start abstractmethod read_file] - @abstractmethod - def read_file(self, suf: str): - """Get the content of the file. - - The address of the file is the prefix of the resource plugin plus the parameter suf. - """ - - # [end abstractmethod read_file] - - def get_index(self, s: str, x, n): - """Find the subscript of the nth occurrence of the X character in the string s.""" - if n <= s.count(x): - all_index = [key for key, value in enumerate(s) if value == x] - return all_index[n - 1] - else: - raise PyResPluginException("Incomplete path.") - - -# [end resource_plugin_definition] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py deleted file mode 100644 index 3fec31fd67..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py +++ /dev/null @@ -1,384 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler Task and TaskRelation object.""" -import copy -import types -from logging import getLogger -from typing import Dict, List, Optional, Sequence, Set, Tuple, Union - -from pydolphinscheduler import configuration -from pydolphinscheduler.constants import ( - Delimiter, - ResourceKey, - Symbol, - TaskFlag, - TaskPriority, - TaskTimeoutFlag, -) -from pydolphinscheduler.core.process_definition import ( - ProcessDefinition, - ProcessDefinitionContext, -) -from pydolphinscheduler.core.resource import Resource -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.exceptions import PyDSParamException, PyResPluginException -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models import Base - -logger = getLogger(__name__) - - -class TaskRelation(Base): - """TaskRelation object, describe the relation of exactly two tasks.""" - - # Add attr `_KEY_ATTR` to overwrite :func:`__eq__`, it is make set - # `Task.process_definition._task_relations` work correctly. - _KEY_ATTR = { - "pre_task_code", - "post_task_code", - } - - _DEFINE_ATTR = { - "pre_task_code", - "post_task_code", - } - - _DEFAULT_ATTR = { - "name": "", - "preTaskVersion": 1, - "postTaskVersion": 1, - "conditionType": 0, - "conditionParams": {}, - } - - def __init__( - self, - pre_task_code: int, - post_task_code: int, - name: Optional[str] = None, - ): - super().__init__(name) - self.pre_task_code = pre_task_code - self.post_task_code = post_task_code - - def __hash__(self): - return hash(f"{self.pre_task_code} {Delimiter.DIRECTION} {self.post_task_code}") - - -class Task(Base): - """Task object, parent class for all exactly task type.""" - - _DEFINE_ATTR = { - "name", - "code", - "version", - "task_type", - "task_params", - "description", - "flag", - "task_priority", - "worker_group", - "environment_code", - "delay_time", - "fail_retry_times", - "fail_retry_interval", - "timeout_flag", - "timeout_notify_strategy", - "timeout", - } - - # task default attribute will into `task_params` property - _task_default_attr = { - "local_params", - "resource_list", - "dependence", - "wait_start_timeout", - "condition_result", - } - # task attribute ignore from _task_default_attr and will not into `task_params` property - _task_ignore_attr: set = set() - # task custom attribute define in sub class and will append to `task_params` property - _task_custom_attr: set = set() - - ext: set = None - ext_attr: Union[str, types.FunctionType] = None - - DEFAULT_CONDITION_RESULT = {"successNode": [""], "failedNode": [""]} - - def __init__( - self, - name: str, - task_type: str, - description: Optional[str] = None, - flag: Optional[str] = TaskFlag.YES, - task_priority: Optional[str] = TaskPriority.MEDIUM, - worker_group: Optional[str] = configuration.WORKFLOW_WORKER_GROUP, - environment_name: Optional[str] = None, - delay_time: Optional[int] = 0, - fail_retry_times: Optional[int] = 0, - fail_retry_interval: Optional[int] = 1, - timeout_flag: Optional[int] = TaskTimeoutFlag.CLOSE, - timeout_notify_strategy: Optional = None, - timeout: Optional[int] = 0, - process_definition: Optional[ProcessDefinition] = None, - local_params: Optional[List] = None, - resource_list: Optional[List] = None, - dependence: Optional[Dict] = None, - wait_start_timeout: Optional[Dict] = None, - condition_result: Optional[Dict] = None, - resource_plugin: Optional[ResourcePlugin] = None, - ): - - super().__init__(name, description) - self.task_type = task_type - self.flag = flag - self.task_priority = task_priority - self.worker_group = worker_group - self._environment_name = environment_name - self.fail_retry_times = fail_retry_times - self.fail_retry_interval = fail_retry_interval - self.delay_time = delay_time - self.timeout_flag = timeout_flag - self.timeout_notify_strategy = timeout_notify_strategy - self.timeout = timeout - self._process_definition = None - self.process_definition: ProcessDefinition = ( - process_definition or ProcessDefinitionContext.get() - ) - self._upstream_task_codes: Set[int] = set() - self._downstream_task_codes: Set[int] = set() - self._task_relation: Set[TaskRelation] = set() - # move attribute code and version after _process_definition and process_definition declare - self.code, self.version = self.gen_code_and_version() - # Add task to process definition, maybe we could put into property process_definition latter - - if ( - self.process_definition is not None - and self.code not in self.process_definition.tasks - ): - self.process_definition.add_task(self) - else: - logger.warning( - "Task code %d already in process definition, prohibit re-add task.", - self.code, - ) - - # Attribute for task param - self.local_params = local_params or [] - self._resource_list = resource_list or [] - self.dependence = dependence or {} - self.wait_start_timeout = wait_start_timeout or {} - self._condition_result = condition_result or self.DEFAULT_CONDITION_RESULT - self.resource_plugin = resource_plugin - self.get_content() - - @property - def process_definition(self) -> Optional[ProcessDefinition]: - """Get attribute process_definition.""" - return self._process_definition - - @process_definition.setter - def process_definition(self, process_definition: Optional[ProcessDefinition]): - """Set attribute process_definition.""" - self._process_definition = process_definition - - @property - def resource_list(self) -> List: - """Get task define attribute `resource_list`.""" - resources = set() - for res in self._resource_list: - if type(res) == str: - resources.add( - Resource(name=res, user_name=self.user_name).get_id_from_database() - ) - elif type(res) == dict and res.get(ResourceKey.ID) is not None: - logger.warning( - """`resource_list` should be defined using List[str] with resource paths, - the use of ids to define resources will be remove in version 3.2.0. - """ - ) - resources.add(res.get(ResourceKey.ID)) - return [{ResourceKey.ID: r} for r in resources] - - @property - def user_name(self) -> Optional[str]: - """Return user name of process definition.""" - if self.process_definition: - return self.process_definition.user.name - else: - raise PyDSParamException("`user_name` cannot be empty.") - - @property - def condition_result(self) -> Dict: - """Get attribute condition_result.""" - return self._condition_result - - @condition_result.setter - def condition_result(self, condition_result: Optional[Dict]): - """Set attribute condition_result.""" - self._condition_result = condition_result - - def _get_attr(self) -> Set[str]: - """Get final task task_params attribute. - - Base on `_task_default_attr`, append attribute from `_task_custom_attr` and subtract attribute from - `_task_ignore_attr`. - """ - attr = copy.deepcopy(self._task_default_attr) - attr -= self._task_ignore_attr - attr |= self._task_custom_attr - return attr - - @property - def task_params(self) -> Optional[Dict]: - """Get task parameter object. - - Will get result to combine _task_custom_attr and custom_attr. - """ - custom_attr = self._get_attr() - return self.get_define_custom(custom_attr=custom_attr) - - def get_plugin(self): - """Return the resource plug-in. - - according to parameter resource_plugin and parameter - process_definition.resource_plugin. - """ - if self.resource_plugin is None: - if self.process_definition.resource_plugin is not None: - return self.process_definition.resource_plugin - else: - raise PyResPluginException( - "The execution command of this task is a file, but the resource plugin is empty" - ) - else: - return self.resource_plugin - - def get_content(self): - """Get the file content according to the resource plugin.""" - if self.ext_attr is None and self.ext is None: - return - _ext_attr = getattr(self, self.ext_attr) - if _ext_attr is not None: - if isinstance(_ext_attr, str) and _ext_attr.endswith(tuple(self.ext)): - res = self.get_plugin() - content = res.read_file(_ext_attr) - setattr(self, self.ext_attr.lstrip(Symbol.UNDERLINE), content) - else: - if self.resource_plugin is not None or ( - self.process_definition is not None - and self.process_definition.resource_plugin is not None - ): - index = _ext_attr.rfind(Symbol.POINT) - if index != -1: - raise ValueError( - "This task does not support files with suffix {}, only supports {}".format( - _ext_attr[index:], - Symbol.COMMA.join(str(suf) for suf in self.ext), - ) - ) - setattr(self, self.ext_attr.lstrip(Symbol.UNDERLINE), _ext_attr) - - def __hash__(self): - return hash(self.code) - - def __lshift__(self, other: Union["Task", Sequence["Task"]]): - """Implement Task << Task.""" - self.set_upstream(other) - return other - - def __rshift__(self, other: Union["Task", Sequence["Task"]]): - """Implement Task >> Task.""" - self.set_downstream(other) - return other - - def __rrshift__(self, other: Union["Task", Sequence["Task"]]): - """Call for Task >> [Task] because list don't have __rshift__ operators.""" - self.__lshift__(other) - return self - - def __rlshift__(self, other: Union["Task", Sequence["Task"]]): - """Call for Task << [Task] because list don't have __lshift__ operators.""" - self.__rshift__(other) - return self - - def _set_deps( - self, tasks: Union["Task", Sequence["Task"]], upstream: bool = True - ) -> None: - """ - Set parameter tasks dependent to current task. - - it is a wrapper for :func:`set_upstream` and :func:`set_downstream`. - """ - if not isinstance(tasks, Sequence): - tasks = [tasks] - - for task in tasks: - if upstream: - self._upstream_task_codes.add(task.code) - task._downstream_task_codes.add(self.code) - - if self._process_definition: - task_relation = TaskRelation( - pre_task_code=task.code, - post_task_code=self.code, - name=f"{task.name} {Delimiter.DIRECTION} {self.name}", - ) - self.process_definition._task_relations.add(task_relation) - else: - self._downstream_task_codes.add(task.code) - task._upstream_task_codes.add(self.code) - - if self._process_definition: - task_relation = TaskRelation( - pre_task_code=self.code, - post_task_code=task.code, - name=f"{self.name} {Delimiter.DIRECTION} {task.name}", - ) - self.process_definition._task_relations.add(task_relation) - - def set_upstream(self, tasks: Union["Task", Sequence["Task"]]) -> None: - """Set parameter tasks as upstream to current task.""" - self._set_deps(tasks, upstream=True) - - def set_downstream(self, tasks: Union["Task", Sequence["Task"]]) -> None: - """Set parameter tasks as downstream to current task.""" - self._set_deps(tasks, upstream=False) - - # TODO code should better generate in bulk mode when :ref: processDefinition run submit or start - def gen_code_and_version(self) -> Tuple: - """ - Generate task code and version from java gateway. - - If task name do not exists in process definition before, if will generate new code and version id - equal to 0 by java gateway, otherwise if will return the exists code and version. - """ - # TODO get code from specific project process definition and task name - result = JavaGate().get_code_and_version( - self.process_definition._project, self.process_definition.name, self.name - ) - # result = gateway.entry_point.genTaskCodeList(DefaultTaskCodeNum.DEFAULT) - # gateway_result_checker(result) - return result.get("code"), result.get("version") - - @property - def environment_code(self) -> str: - """Convert environment name to code.""" - if self._environment_name is None: - return None - return JavaGate().query_environment_info(self._environment_name) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/yaml_process_define.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/yaml_process_define.py deleted file mode 100644 index 0944925a48..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/yaml_process_define.py +++ /dev/null @@ -1,466 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Parse YAML file to create process.""" - -import logging -import os -import re -from pathlib import Path -from typing import Any, Dict - -from pydolphinscheduler import configuration, tasks -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSTaskNoFoundException -from pydolphinscheduler.utils.yaml_parser import YamlParser - -logger = logging.getLogger(__file__) - -KEY_PROCESS = "workflow" -KEY_TASK = "tasks" -KEY_TASK_TYPE = "task_type" -KEY_DEPS = "deps" -KEY_OP = "op" - -TASK_SPECIAL_KEYS = [KEY_TASK_TYPE, KEY_DEPS] - - -class ParseTool: - """Enhanced parsing tools.""" - - @staticmethod - def parse_string_param_if_file(string_param: str, **kwargs): - """Use $FILE{"data_path"} to load file from "data_path".""" - if string_param.startswith("$FILE"): - path = re.findall(r"\$FILE\{\"(.*?)\"\}", string_param)[0] - base_folder = kwargs.get("base_folder", ".") - path = ParseTool.get_possible_path(path, base_folder) - with open(path, "r") as read_file: - string_param = "".join(read_file) - return string_param - - @staticmethod - def parse_string_param_if_env(string_param: str, **kwargs): - """Use $ENV{env_name} to load environment variable "env_name".""" - if "$ENV" in string_param: - key = re.findall(r"\$ENV\{(.*?)\}", string_param)[0] - env_value = os.environ.get(key, "$%s" % key) - string_param = string_param.replace("$ENV{%s}" % key, env_value) - return string_param - - @staticmethod - def parse_string_param_if_config(string_param: str, **kwargs): - """Use ${CONFIG.var_name} to load variable "var_name" from configuration.""" - if "${CONFIG" in string_param: - key = re.findall(r"\$\{CONFIG\.(.*?)\}", string_param)[0] - if hasattr(configuration, key): - string_param = getattr(configuration, key) - else: - string_param = configuration.get_single_config(key) - - return string_param - - @staticmethod - def get_possible_path(file_path, base_folder): - """Get file possible path. - - Return new path if file_path is not exists, but base_folder + file_path exists - """ - possible_path = file_path - if not Path(file_path).exists(): - new_path = Path(base_folder).joinpath(file_path) - if new_path.exists(): - possible_path = new_path - logger.info(f"{file_path} not exists, convert to {possible_path}") - - return possible_path - - -def get_task_cls(task_type) -> Task: - """Get the task class object by task_type (case compatible).""" - # only get task class from tasks.__all__ - all_task_types = {type_.capitalize(): type_ for type_ in tasks.__all__} - task_type_cap = task_type.capitalize() - if task_type_cap not in all_task_types: - raise PyDSTaskNoFoundException("cant not find task %s" % task_type) - - standard_name = all_task_types[task_type_cap] - return getattr(tasks, standard_name) - - -class YamlProcess(YamlParser): - """Yaml parser for create process. - - :param yaml_file: yaml file path. - - examples1 :: - - parser = YamlParser(yaml_file=...) - parser.create_process_definition() - - examples2 :: - - YamlParser(yaml_file=...).create_process_definition() - - """ - - _parse_rules = [ - ParseTool.parse_string_param_if_file, - ParseTool.parse_string_param_if_env, - ParseTool.parse_string_param_if_config, - ] - - def __init__(self, yaml_file: str): - with open(yaml_file, "r") as f: - content = f.read() - - self._base_folder = Path(yaml_file).parent - content = self.prepare_refer_process(content) - super().__init__(content) - - def create_process_definition(self): - """Create process main function.""" - # get process parameters with key "workflow" - process_params = self[KEY_PROCESS] - - # pop "run" parameter, used at the end - is_run = process_params.pop("run", False) - - # use YamlProcess._parse_rules to parse special value of yaml file - process_params = self.parse_params(process_params) - - process_name = process_params["name"] - logger.info(f"Create Process: {process_name}") - with ProcessDefinition(**process_params) as pd: - - # save dependencies between tasks - dependencies = {} - - # save name and task mapping - name2task = {} - - # get task datas with key "tasks" - for task_data in self[KEY_TASK]: - task = self.parse_task(task_data, name2task) - - deps = task_data.get(KEY_DEPS, []) - if deps: - dependencies[task.name] = deps - name2task[task.name] = task - - # build dependencies between task - for downstream_task_name, deps in dependencies.items(): - downstream_task = name2task[downstream_task_name] - for upstream_task_name in deps: - upstream_task = name2task[upstream_task_name] - upstream_task >> downstream_task - - pd.submit() - # if set is_run, run the process after submit - if is_run: - logger.info(f"run workflow: {pd}") - pd.run() - - return process_name - - def parse_params(self, params: Any): - """Recursively resolves the parameter values. - - The function operates params only when it encounters a string; other types continue recursively. - """ - if isinstance(params, str): - for parse_rule in self._parse_rules: - params_ = params - params = parse_rule(params, base_folder=self._base_folder) - if params_ != params: - logger.info(f"parse {params_} -> {params}") - - elif isinstance(params, list): - for index in range(len(params)): - params[index] = self.parse_params(params[index]) - - elif isinstance(params, dict): - for key, value in params.items(): - params[key] = self.parse_params(value) - - return params - - @classmethod - def parse(cls, yaml_file: str): - """Recursively resolves the parameter values. - - The function operates params only when it encounters a string; other types continue recursively. - """ - process_name = cls(yaml_file).create_process_definition() - return process_name - - def prepare_refer_process(self, content): - """Allow YAML files to reference process derived from other YAML files.""" - process_paths = re.findall(r"\$WORKFLOW\{\"(.*?)\"\}", content) - for process_path in process_paths: - logger.info( - f"find special token {process_path}, load process form {process_path}" - ) - possible_path = ParseTool.get_possible_path(process_path, self._base_folder) - process_name = YamlProcess.parse(possible_path) - content = content.replace('$WORKFLOW{"%s"}' % process_path, process_name) - - return content - - def parse_task(self, task_data: dict, name2task: Dict[str, Task]): - """Parse various types of tasks. - - :param task_data: dict. - { - "task_type": "Shell", - "params": {"name": "shell_task", "command":"ehco hellp"} - } - - :param name2task: Dict[str, Task]), mapping of task_name and task - - - Some task type have special parse func: - if task type is Switch, use parse_switch; - if task type is Condition, use parse_condition; - if task type is Dependent, use parse_dependent; - other, we pass all task_params as input to task class, like "task_cls(**task_params)". - """ - task_type = task_data["task_type"] - # get params without special key - task_params = {k: v for k, v in task_data.items() if k not in TASK_SPECIAL_KEYS} - - task_cls = get_task_cls(task_type) - - # use YamlProcess._parse_rules to parse special value of yaml file - task_params = self.parse_params(task_params) - - if task_cls == tasks.Switch: - task = self.parse_switch(task_params, name2task) - - elif task_cls == tasks.Condition: - task = self.parse_condition(task_params, name2task) - - elif task_cls == tasks.Dependent: - task = self.parse_dependent(task_params, name2task) - - else: - task = task_cls(**task_params) - logger.info(task_type, task) - return task - - def parse_switch(self, task_params, name2task): - """Parse Switch Task. - - This is an example Yaml fragment of task_params - - name: switch - condition: - - ["${var} > 1", switch_child_1] - - switch_child_2 - """ - from pydolphinscheduler.tasks.switch import ( - Branch, - Default, - Switch, - SwitchCondition, - ) - - condition_datas = task_params["condition"] - conditions = [] - for condition_data in condition_datas: - assert "task" in condition_data, "task must be in %s" % condition_data - task_name = condition_data["task"] - condition_string = condition_data.get("condition", None) - - # if condition_string is None, for example: {"task": "switch_child_2"}, set it to Default branch - if condition_string is None: - conditions.append(Default(task=name2task.get(task_name))) - - # if condition_string is not None, for example: - # {"task": "switch_child_2", "condition": "${var} > 1"} set it to Branch - else: - conditions.append( - Branch(condition_string, task=name2task.get(task_name)) - ) - - switch = Switch( - name=task_params["name"], condition=SwitchCondition(*conditions) - ) - return switch - - def parse_condition(self, task_params, name2task): - """Parse Condition Task. - - This is an example Yaml fragment of task_params - - name: condition - success_task: success_branch - failed_task: fail_branch - OP: AND - groups: - - - OP: AND - groups: - - [pre_task_1, true] - - [pre_task_2, true] - - [pre_task_3, false] - - - OP: AND - groups: - - [pre_task_1, false] - - [pre_task_2, true] - - [pre_task_3, true] - - """ - from pydolphinscheduler.tasks.condition import ( - FAILURE, - SUCCESS, - And, - Condition, - Or, - ) - - def get_op_cls(op): - cls = None - if op.lower() == "and": - cls = And - elif op.lower() == "or": - cls = Or - else: - raise Exception("OP must be in And or Or, but get: %s" % op) - return cls - - second_cond_ops = [] - for first_group in task_params["groups"]: - second_op = first_group["op"] - task_ops = [] - for condition_data in first_group["groups"]: - assert "task" in condition_data, "task must be in %s" % condition_data - assert "flag" in condition_data, "flag must be in %s" % condition_data - task_name = condition_data["task"] - flag = condition_data["flag"] - task = name2task[task_name] - - # for example: task = pre_task_1, flag = true - if flag: - task_ops.append(SUCCESS(task)) - else: - task_ops.append(FAILURE(task)) - - second_cond_ops.append(get_op_cls(second_op)(*task_ops)) - - first_op = task_params["op"] - cond_operator = get_op_cls(first_op)(*second_cond_ops) - - condition = Condition( - name=task_params["name"], - condition=cond_operator, - success_task=name2task[task_params["success_task"]], - failed_task=name2task[task_params["failed_task"]], - ) - return condition - - def parse_dependent(self, task_params, name2task): - """Parse Dependent Task. - - This is an example Yaml fragment of task_params - - name: dependent - denpendence: - OP: AND - groups: - - - OP: Or - groups: - - [pydolphin, task_dependent_external, task_1] - - [pydolphin, task_dependent_external, task_2] - - - OP: And - groups: - - [pydolphin, task_dependent_external, task_1, LAST_WEDNESDAY] - - [pydolphin, task_dependent_external, task_2, last24Hours] - - """ - from pydolphinscheduler.tasks.dependent import ( - And, - Dependent, - DependentDate, - DependentItem, - Or, - ) - - def process_dependent_date(dependent_date): - """Parse dependent date (Compatible with key and value of DependentDate).""" - dependent_date_upper = dependent_date.upper() - if hasattr(DependentDate, dependent_date_upper): - dependent_date = getattr(DependentDate, dependent_date_upper) - return dependent_date - - def get_op_cls(op): - cls = None - if op.lower() == "and": - cls = And - elif op.lower() == "or": - cls = Or - else: - raise Exception("OP must be in And or Or, but get: %s" % op) - return cls - - def create_dependent_item(source_items): - """Parse dependent item. - - project_name: pydolphin - process_definition_name: task_dependent_external - dependent_task_name: task_1 - dependent_date: LAST_WEDNESDAY - """ - project_name = source_items["project_name"] - process_definition_name = source_items["process_definition_name"] - dependent_task_name = source_items["dependent_task_name"] - dependent_date = source_items.get("dependent_date", DependentDate.TODAY) - dependent_item = DependentItem( - project_name=project_name, - process_definition_name=process_definition_name, - dependent_task_name=dependent_task_name, - dependent_date=process_dependent_date(dependent_date), - ) - - return dependent_item - - second_dependences = [] - for first_group in task_params["groups"]: - second_op = first_group[KEY_OP] - dependence_items = [] - for source_items in first_group["groups"]: - dependence_items.append(create_dependent_item(source_items)) - - second_dependences.append(get_op_cls(second_op)(*dependence_items)) - - first_op = task_params[KEY_OP] - dependence = get_op_cls(first_op)(*second_dependences) - - task = Dependent( - name=task_params["name"], - dependence=dependence, - ) - return task - - -def create_process_definition(yaml_file): - """CLI.""" - YamlProcess.parse(yaml_file) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/default_config.yaml b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/default_config.yaml deleted file mode 100644 index 98d7b99fdc..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/default_config.yaml +++ /dev/null @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Setting about Java gateway server -java_gateway: - # The address of Python gateway server start. Set its value to `0.0.0.0` if your Python API run in different - # between Python gateway server. It could be be specific to other address like `127.0.0.1` or `localhost` - address: 127.0.0.1 - - # The port of Python gateway server start. Define which port you could connect to Python gateway server from - # Python API models. - port: 25333 - - # Whether automatically convert Python objects to Java Objects. Default value is ``True``. There is some - # performance lost when set to ``True`` but for now pydolphinscheduler do not handle the convert issue between - # java and Python, mark it as TODO item in the future. - auto_convert: true - -# Setting about dolphinscheduler default value, will use the value set below if property do not set, which -# including ``user``, ``workflow`` -default: - # Default value for dolphinscheduler's user object - user: - name: userPythonGateway - password: userPythonGateway - email: userPythonGateway@dolphinscheduler.com - tenant: tenant_pydolphin - phone: 11111111111 - state: 1 - # Default value for dolphinscheduler's workflow object - workflow: - project: project-pydolphin - tenant: tenant_pydolphin - user: userPythonGateway - queue: queuePythonGateway - worker_group: default - # Release state of workflow, default value is ``online`` which mean setting workflow online when it submits - # to Java gateway, if you want to set workflow offline set its value to ``offline`` - release_state: online - time_zone: Asia/Shanghai - # Warning type of the workflow, default value is ``NONE`` mean do not warn user in any cases of workflow state, - # change to ``FAILURE`` if you want to warn users when workflow failed. All available enum value are - # ``NONE``, ``SUCCESS``, ``FAILURE``, ``ALL`` - warning_type: NONE diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/__init__.py deleted file mode 100644 index 37b2e5b61c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init examples package which provides users with pydolphinscheduler examples.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/bulk_create_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/bulk_create_example.py deleted file mode 100644 index 72bdb02243..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/bulk_create_example.py +++ /dev/null @@ -1,55 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -This example show you how to create workflows in batch mode. - -After this example run, we will create 10 workflows named `workflow:`, and with 3 tasks -named `task:-workflow:` in each workflow. Task shape as below - -task:1-workflow:1 -> task:2-workflow:1 -> task:3-workflow:1 - -Each workflow is linear since we set `IS_CHAIN=True`, you could change task to parallel by set it to `False`. -""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.shell import Shell - -NUM_WORKFLOWS = 10 -NUM_TASKS = 5 -# Make sure your tenant exists in your operator system -TENANT = "exists_tenant" -# Whether task should dependent on pre one or not -# False will create workflow with independent task, while True task will dependent on pre-task and dependence -# link like `pre_task -> current_task -> next_task`, default True -IS_CHAIN = True - -for wf in range(0, NUM_WORKFLOWS): - workflow_name = f"workflow:{wf}" - - with ProcessDefinition(name=workflow_name, tenant=TENANT) as pd: - for t in range(0, NUM_TASKS): - task_name = f"task:{t}-{workflow_name}" - command = f"echo This is task {task_name}" - task = Shell(name=task_name, command=command) - - if IS_CHAIN and t > 0: - pre_task_name = f"task:{t-1}-{workflow_name}" - pd.get_one_task_by_name(pre_task_name) >> task - - # We just submit workflow and task definition without set schedule time or run it manually - pd.submit() diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_condition_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_condition_example.py deleted file mode 100644 index 2d73df4b40..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_condition_example.py +++ /dev/null @@ -1,59 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -r""" -A example workflow for task condition. - -This example will create five task in single workflow, with four shell task and one condition task. Task -condition have one upstream which we declare explicit with syntax `parent >> condition`, and three downstream -automatically set dependence by condition task by passing parameter `condition`. The graph of this workflow -like: -pre_task_1 -> -> success_branch - \ / -pre_task_2 -> -> conditions -> - / \ -pre_task_3 -> -> fail_branch -. -""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.condition import FAILURE, SUCCESS, And, Condition -from pydolphinscheduler.tasks.shell import Shell - -with ProcessDefinition(name="task_condition_example", tenant="tenant_exists") as pd: - pre_task_1 = Shell(name="pre_task_1", command="echo pre_task_1") - pre_task_2 = Shell(name="pre_task_2", command="echo pre_task_2") - pre_task_3 = Shell(name="pre_task_3", command="echo pre_task_3") - cond_operator = And( - And( - SUCCESS(pre_task_1, pre_task_2), - FAILURE(pre_task_3), - ), - ) - - success_branch = Shell(name="success_branch", command="echo success_branch") - fail_branch = Shell(name="fail_branch", command="echo fail_branch") - - condition = Condition( - name="condition", - condition=cond_operator, - success_task=success_branch, - failed_task=fail_branch, - ) - pd.submit() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_datax_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_datax_example.py deleted file mode 100644 index 94bd449cf7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_datax_example.py +++ /dev/null @@ -1,95 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -""" -A example workflow for task datax. - -This example will create a workflow named `task_datax`. -`task_datax` is true workflow define and run task task_datax. -You can create data sources `first_mysql` and `first_mysql` through UI. -It creates a task to synchronize datax from the source database to the target database. -""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.datax import CustomDataX, DataX - -# datax json template -JSON_TEMPLATE = { - "job": { - "content": [ - { - "reader": { - "name": "mysqlreader", - "parameter": { - "username": "usr", - "password": "pwd", - "column": ["id", "name", "code", "description"], - "splitPk": "id", - "connection": [ - { - "table": ["source_table"], - "jdbcUrl": ["jdbc:mysql://127.0.0.1:3306/source_db"], - } - ], - }, - }, - "writer": { - "name": "mysqlwriter", - "parameter": { - "writeMode": "insert", - "username": "usr", - "password": "pwd", - "column": ["id", "name"], - "connection": [ - { - "jdbcUrl": "jdbc:mysql://127.0.0.1:3306/target_db", - "table": ["target_table"], - } - ], - }, - }, - } - ], - "setting": { - "errorLimit": {"percentage": 0, "record": 0}, - "speed": {"channel": 1, "record": 1000}, - }, - } -} - -with ProcessDefinition( - name="task_datax_example", - tenant="tenant_exists", -) as pd: - # This task synchronizes the data in `t_ds_project` - # of `first_mysql` database to `target_project` of `second_mysql` database. - # You have to make sure data source named `first_mysql` and `second_mysql` exists - # in your environment. - task1 = DataX( - name="task_datax", - datasource_name="first_mysql", - datatarget_name="second_mysql", - sql="select id, name, code, description from source_table", - target_table="target_table", - ) - - # You can custom json_template of datax to sync data. This task create a new - # datax job same as task1, transfer record from `first_mysql` to `second_mysql` - task2 = CustomDataX(name="task_custom_datax", json=str(JSON_TEMPLATE)) - pd.run() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dependent_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dependent_example.py deleted file mode 100644 index db53bcc9f3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dependent_example.py +++ /dev/null @@ -1,74 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -r""" -A example workflow for task dependent. - -This example will create two workflows named `task_dependent` and `task_dependent_external`. -`task_dependent` is true workflow define and run task dependent, while `task_dependent_external` -define outside workflow and task from dependent. - -After this script submit, we would get workflow as below: - -task_dependent_external: - -task_1 -task_2 -task_3 - -task_dependent: - -task_dependent(this task dependent on task_dependent_external.task_1 and task_dependent_external.task_2). -""" -from pydolphinscheduler import configuration -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.dependent import And, Dependent, DependentItem, Or -from pydolphinscheduler.tasks.shell import Shell - -with ProcessDefinition( - name="task_dependent_external", - tenant="tenant_exists", -) as pd: - task_1 = Shell(name="task_1", command="echo task 1") - task_2 = Shell(name="task_2", command="echo task 2") - task_3 = Shell(name="task_3", command="echo task 3") - pd.submit() - -with ProcessDefinition( - name="task_dependent_example", - tenant="tenant_exists", -) as pd: - task = Dependent( - name="task_dependent", - dependence=And( - Or( - DependentItem( - project_name=configuration.WORKFLOW_PROJECT, - process_definition_name="task_dependent_external", - dependent_task_name="task_1", - ), - DependentItem( - project_name=configuration.WORKFLOW_PROJECT, - process_definition_name="task_dependent_external", - dependent_task_name="task_2", - ), - ) - ), - ) - pd.submit() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dvc_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dvc_example.py deleted file mode 100644 index 2b93cd14b7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_dvc_example.py +++ /dev/null @@ -1,52 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task dvc.""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks import DVCDownload, DVCInit, DVCUpload - -repository = "git@github.com:/dvc-data-repository-example.git" - -with ProcessDefinition( - name="task_dvc_example", - tenant="tenant_exists", -) as pd: - init_task = DVCInit(name="init_dvc", repository=repository, store_url="~/dvc_data") - upload_task = DVCUpload( - name="upload_data", - repository=repository, - data_path_in_dvc_repository="iris", - data_path_in_worker="~/source/iris", - version="v1", - message="upload iris data v1", - ) - - download_task = DVCDownload( - name="download_data", - repository=repository, - data_path_in_dvc_repository="iris", - data_path_in_worker="~/target/iris", - version="v1", - ) - - init_task >> upload_task >> download_task - - pd.run() - -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_flink_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_flink_example.py deleted file mode 100644 index 1e8a040c65..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_flink_example.py +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task flink.""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.flink import DeployMode, Flink, ProgramType - -with ProcessDefinition(name="task_flink_example", tenant="tenant_exists") as pd: - task = Flink( - name="task_flink", - main_class="org.apache.flink.streaming.examples.wordcount.WordCount", - main_package="WordCount.jar", - program_type=ProgramType.JAVA, - deploy_mode=DeployMode.LOCAL, - ) - pd.run() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_map_reduce_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_map_reduce_example.py deleted file mode 100644 index 39b204f82a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_map_reduce_example.py +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task mr.""" - -from pydolphinscheduler.core.engine import ProgramType -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.map_reduce import MR - -with ProcessDefinition(name="task_map_reduce_example", tenant="tenant_exists") as pd: - task = MR( - name="task_mr", - main_class="wordcount", - main_package="hadoop-mapreduce-examples-3.3.1.jar", - program_type=ProgramType.JAVA, - main_args="/dolphinscheduler/tenant_exists/resources/file.txt /output/ds", - ) - pd.run() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py deleted file mode 100644 index c2734bcf81..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_mlflow_example.py +++ /dev/null @@ -1,93 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task mlflow.""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.mlflow import ( - MLflowDeployType, - MLflowModels, - MLFlowProjectsAutoML, - MLFlowProjectsBasicAlgorithm, - MLFlowProjectsCustom, -) - -mlflow_tracking_uri = "http://127.0.0.1:5000" - -with ProcessDefinition( - name="task_mlflow_example", - tenant="tenant_exists", -) as pd: - - # run custom mlflow project to train model - train_custom = MLFlowProjectsCustom( - name="train_xgboost_native", - repository="https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native", - mlflow_tracking_uri=mlflow_tracking_uri, - parameters="-P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9", - experiment_name="xgboost", - ) - - # run automl to train model - train_automl = MLFlowProjectsAutoML( - name="train_automl", - mlflow_tracking_uri=mlflow_tracking_uri, - parameters="time_budget=30;estimator_list=['lgbm']", - experiment_name="automl_iris", - model_name="iris_A", - automl_tool="flaml", - data_path="/data/examples/iris", - ) - - # Using DOCKER to deploy model from train_automl - deploy_docker = MLflowModels( - name="deploy_docker", - model_uri="models:/iris_A/Production", - mlflow_tracking_uri=mlflow_tracking_uri, - deploy_mode=MLflowDeployType.DOCKER, - port=7002, - ) - - train_automl >> deploy_docker - - # run lightgbm to train model - train_basic_algorithm = MLFlowProjectsBasicAlgorithm( - name="train_basic_algorithm", - mlflow_tracking_uri=mlflow_tracking_uri, - parameters="n_estimators=200;learning_rate=0.2", - experiment_name="basic_algorithm_iris", - model_name="iris_B", - algorithm="lightgbm", - data_path="/data/examples/iris", - search_params="max_depth=[5, 10];n_estimators=[100, 200]", - ) - - # Using MLFLOW to deploy model from training lightgbm project - deploy_mlflow = MLflowModels( - name="deploy_mlflow", - model_uri="models:/iris_B/Production", - mlflow_tracking_uri=mlflow_tracking_uri, - deploy_mode=MLflowDeployType.MLFLOW, - port=7001, - ) - - train_basic_algorithm >> deploy_mlflow - - pd.submit() - -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_openmldb_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_openmldb_example.py deleted file mode 100644 index 5b90091ecf..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_openmldb_example.py +++ /dev/null @@ -1,43 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task openmldb.""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.openmldb import OpenMLDB - -sql = """USE demo_db; -set @@job_timeout=200000; -LOAD DATA INFILE 'file:///tmp/train_sample.csv' -INTO TABLE talkingdata OPTIONS(mode='overwrite'); -""" - -with ProcessDefinition( - name="task_openmldb_example", - tenant="tenant_exists", -) as pd: - task_openmldb = OpenMLDB( - name="task_openmldb", - zookeeper="127.0.0.1:2181", - zookeeper_path="/openmldb", - execute_mode="offline", - sql=sql, - ) - - pd.run() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_pytorch_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_pytorch_example.py deleted file mode 100644 index 6559c9ac65..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_pytorch_example.py +++ /dev/null @@ -1,62 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task pytorch.""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.pytorch import Pytorch - -with ProcessDefinition( - name="task_pytorch_example", - tenant="tenant_exists", -) as pd: - - # run project with existing environment - task_existing_env = Pytorch( - name="task_existing_env", - script="main.py", - script_params="--dry-run --no-cuda", - project_path="https://github.com/pytorch/examples#mnist", - python_command="/home/anaconda3/envs/pytorch/bin/python3", - ) - - # run project with creating conda environment - task_conda_env = Pytorch( - name="task_conda_env", - script="main.py", - script_params="--dry-run --no-cuda", - project_path="https://github.com/pytorch/examples#mnist", - is_create_environment=True, - python_env_tool="conda", - requirements="requirements.txt", - conda_python_version="3.7", - ) - - # run project with creating virtualenv environment - task_virtualenv_env = Pytorch( - name="task_virtualenv_env", - script="main.py", - script_params="--dry-run --no-cuda", - project_path="https://github.com/pytorch/examples#mnist", - is_create_environment=True, - python_env_tool="virtualenv", - requirements="requirements.txt", - ) - - pd.submit() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_sagemaker_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_sagemaker_example.py deleted file mode 100644 index b056f61a63..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_sagemaker_example.py +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task sagemaker.""" -import json - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.sagemaker import SageMaker - -sagemaker_request_data = { - "ParallelismConfiguration": {"MaxParallelExecutionSteps": 1}, - "PipelineExecutionDescription": "test Pipeline", - "PipelineExecutionDisplayName": "AbalonePipeline", - "PipelineName": "AbalonePipeline", - "PipelineParameters": [ - {"Name": "ProcessingInstanceType", "Value": "ml.m4.xlarge"}, - {"Name": "ProcessingInstanceCount", "Value": "2"}, - ], -} - -with ProcessDefinition( - name="task_sagemaker_example", - tenant="tenant_exists", -) as pd: - task_sagemaker = SageMaker( - name="task_sagemaker", - sagemaker_request_json=json.dumps(sagemaker_request_data, indent=2), - ) - - pd.run() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_spark_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_spark_example.py deleted file mode 100644 index 594d95f55a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_spark_example.py +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -"""A example workflow for task spark.""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.spark import DeployMode, ProgramType, Spark - -with ProcessDefinition(name="task_spark_example", tenant="tenant_exists") as pd: - task = Spark( - name="task_spark", - main_class="org.apache.spark.examples.SparkPi", - main_package="spark-examples_2.12-3.2.0.jar", - program_type=ProgramType.JAVA, - deploy_mode=DeployMode.LOCAL, - ) - pd.run() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_switch_example.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_switch_example.py deleted file mode 100644 index 7966af320e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/task_switch_example.py +++ /dev/null @@ -1,51 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# [start workflow_declare] -r""" -A example workflow for task switch. - -This example will create four task in single workflow, with three shell task and one switch task. Task switch -have one upstream which we declare explicit with syntax `parent >> switch`, and two downstream automatically -set dependence by switch task by passing parameter `condition`. The graph of this workflow like: - --> switch_child_1 - / -parent -> switch -> - \ - --> switch_child_2 -. -""" - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.shell import Shell -from pydolphinscheduler.tasks.switch import Branch, Default, Switch, SwitchCondition - -with ProcessDefinition( - name="task_switch_example", tenant="tenant_exists", param={"var": "1"} -) as pd: - parent = Shell(name="parent", command="echo parent") - switch_child_1 = Shell(name="switch_child_1", command="echo switch_child_1") - switch_child_2 = Shell(name="switch_child_2", command="echo switch_child_2") - switch_condition = SwitchCondition( - Branch(condition="${var} > 1", task=switch_child_1), - Default(task=switch_child_2), - ) - - switch = Switch(name="switch", condition=switch_condition) - parent >> switch - pd.submit() -# [end workflow_declare] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py deleted file mode 100644 index 0478e68519..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial.py +++ /dev/null @@ -1,68 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -r""" -A tutorial example take you to experience pydolphinscheduler. - -After tutorial.py file submit to Apache DolphinScheduler server a DAG would be create, -and workflow DAG graph as below: - - --> task_child_one - / \ -task_parent --> --> task_union - \ / - --> task_child_two - -it will instantiate and run all the task it have. -""" - -# [start tutorial] -# [start package_import] -# Import ProcessDefinition object to define your workflow attributes -from pydolphinscheduler.core.process_definition import ProcessDefinition - -# Import task Shell object cause we would create some shell tasks later -from pydolphinscheduler.tasks.shell import Shell - -# [end package_import] - -# [start workflow_declare] -with ProcessDefinition( - name="tutorial", - schedule="0 0 0 * * ? *", - start_time="2021-01-01", - tenant="tenant_exists", -) as pd: - # [end workflow_declare] - # [start task_declare] - task_parent = Shell(name="task_parent", command="echo hello pydolphinscheduler") - task_child_one = Shell(name="task_child_one", command="echo 'child one'") - task_child_two = Shell(name="task_child_two", command="echo 'child two'") - task_union = Shell(name="task_union", command="echo union") - # [end task_declare] - - # [start task_relation_declare] - task_group = [task_child_one, task_child_two] - task_parent.set_downstream(task_group) - - task_union << task_group - # [end task_relation_declare] - - # [start submit_or_run] - pd.run() - # [end submit_or_run] -# [end tutorial] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_decorator.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_decorator.py deleted file mode 100644 index 986c1bbb6e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_decorator.py +++ /dev/null @@ -1,91 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -r""" -A tutorial example take you to experience pydolphinscheduler. - -After tutorial.py file submit to Apache DolphinScheduler server a DAG would be create, -and workflow DAG graph as below: - - --> task_child_one - / \ -task_parent --> --> task_union - \ / - --> task_child_two - -it will instantiate and run all the task it have. -""" - -# [start tutorial] -# [start package_import] -# Import ProcessDefinition object to define your workflow attributes -from pydolphinscheduler.core.process_definition import ProcessDefinition - -# Import task Shell object cause we would create some shell tasks later -from pydolphinscheduler.tasks.func_wrap import task - -# [end package_import] - - -# [start task_declare] -@task -def task_parent(): - """First task in this workflow.""" - print("echo hello pydolphinscheduler") - - -@task -def task_child_one(): - """Child task will be run parallel after task ``task_parent`` finished.""" - print("echo 'child one'") - - -@task -def task_child_two(): - """Child task will be run parallel after task ``task_parent`` finished.""" - print("echo 'child two'") - - -@task -def task_union(): - """Last task in this workflow.""" - print("echo union") - - -# [end task_declare] - - -# [start workflow_declare] -with ProcessDefinition( - name="tutorial_decorator", - schedule="0 0 0 * * ? *", - start_time="2021-01-01", - tenant="tenant_exists", -) as pd: - # [end workflow_declare] - - # [start task_relation_declare] - task_group = [task_child_one(), task_child_two()] - task_parent().set_downstream(task_group) - - task_union() << task_group - # [end task_relation_declare] - - # [start submit_or_run] - pd.run() - # [end submit_or_run] -# [end tutorial] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_resource_plugin.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_resource_plugin.py deleted file mode 100644 index 5b02022ee9..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/examples/tutorial_resource_plugin.py +++ /dev/null @@ -1,64 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -r""" -A tutorial example take you to experience pydolphinscheduler resource plugin. - -Resource plug-ins can be defined in workflows and tasks - -it will instantiate and run all the task it have. -""" -import os -from pathlib import Path - -# [start tutorial_resource_plugin] -# [start package_import] -# Import ProcessDefinition object to define your workflow attributes -from pydolphinscheduler.core.process_definition import ProcessDefinition - -# Import task Shell object cause we would create some shell tasks later -from pydolphinscheduler.resources_plugin.local import Local -from pydolphinscheduler.tasks.shell import Shell - -# [end package_import] - -# [start workflow_declare] -with ProcessDefinition( - name="tutorial_resource_plugin", - schedule="0 0 0 * * ? *", - start_time="2021-01-01", - tenant="tenant_exists", - resource_plugin=Local("/tmp"), -) as process_definition: - # [end workflow_declare] - # [start task_declare] - file = "resource.sh" - path = Path("/tmp").joinpath(file) - with open(str(path), "w") as f: - f.write("echo tutorial resource plugin") - task_parent = Shell( - name="local-resource-example", - command=file, - ) - print(task_parent.task_params) - os.remove(path) - # [end task_declare] - - # [start submit_or_run] - process_definition.run() - # [end submit_or_run] -# [end tutorial_resource_plugin] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/exceptions.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/exceptions.py deleted file mode 100644 index 5b0d1bb61f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/exceptions.py +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Exceptions for pydolphinscheduler.""" - - -class PyDSBaseException(Exception): - """Base exception for pydolphinscheduler.""" - - -class PyDSParamException(PyDSBaseException): - """Exception for pydolphinscheduler parameter verify error.""" - - -class PyDSTaskNoFoundException(PyDSBaseException): - """Exception for pydolphinscheduler workflow task no found error.""" - - -class PyDSJavaGatewayException(PyDSBaseException): - """Exception for pydolphinscheduler Java gateway error.""" - - -class PyDSProcessDefinitionNotAssignException(PyDSBaseException): - """Exception for pydolphinscheduler process definition not assign error.""" - - -class PyDSConfException(PyDSBaseException): - """Exception for pydolphinscheduler configuration error.""" - - -class PyResPluginException(PyDSBaseException): - """Exception for pydolphinscheduler resource plugin error.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/java_gateway.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/java_gateway.py deleted file mode 100644 index 54bb0a38b2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/java_gateway.py +++ /dev/null @@ -1,308 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module java gateway, contain gateway behavior.""" - -import contextlib -from logging import getLogger -from typing import Any, Optional - -from py4j.java_collections import JavaMap -from py4j.java_gateway import GatewayParameters, JavaGateway -from py4j.protocol import Py4JError - -from pydolphinscheduler import __version__, configuration -from pydolphinscheduler.constants import JavaGatewayDefault -from pydolphinscheduler.exceptions import PyDSJavaGatewayException - -logger = getLogger(__name__) - - -def launch_gateway( - address: Optional[str] = None, - port: Optional[int] = None, - auto_convert: Optional[bool] = True, -) -> JavaGateway: - """Launch java gateway to pydolphinscheduler. - - TODO Note that automatic conversion makes calling Java methods slightly less efficient because - in the worst case, Py4J needs to go through all registered converters for all parameters. - This is why automatic conversion is disabled by default. - """ - gateway_parameters = GatewayParameters( - address=address or configuration.JAVA_GATEWAY_ADDRESS, - port=port or configuration.JAVA_GATEWAY_PORT, - auto_convert=auto_convert or configuration.JAVA_GATEWAY_AUTO_CONVERT, - ) - gateway = JavaGateway(gateway_parameters=gateway_parameters) - return gateway - - -def gateway_result_checker( - result: JavaMap, - msg_check: Optional[str] = JavaGatewayDefault.RESULT_MESSAGE_SUCCESS, -) -> Any: - """Check weather java gateway result success or not.""" - if ( - result[JavaGatewayDefault.RESULT_STATUS_KEYWORD].toString() - != JavaGatewayDefault.RESULT_STATUS_SUCCESS - ): - raise PyDSJavaGatewayException("Failed when try to got result for java gateway") - if ( - msg_check is not None - and result[JavaGatewayDefault.RESULT_MESSAGE_KEYWORD] != msg_check - ): - raise PyDSJavaGatewayException("Get result state not success.") - return result - - -class JavaGate: - """Launch java gateway to pydolphin scheduler.""" - - def __init__( - self, - address: Optional[str] = None, - port: Optional[int] = None, - auto_convert: Optional[bool] = True, - ): - self.java_gateway = launch_gateway(address, port, auto_convert) - gateway_version = "unknown" - with contextlib.suppress(Py4JError): - # 1. Java gateway version is too old: doesn't have method 'getGatewayVersion()' - # 2. Error connecting to Java gateway - gateway_version = self.get_gateway_version() - if gateway_version != __version__: - logger.warning( - f"Using unmatched version of pydolphinscheduler (version {__version__}) " - f"and Java gateway (version {gateway_version}) may cause errors. " - "We strongly recommend you to find the matched version " - "(check: https://pypi.org/project/apache-dolphinscheduler)" - ) - - def get_gateway_version(self): - """Get the java gateway version, expected to be equal with pydolphinscheduler.""" - return self.java_gateway.entry_point.getGatewayVersion() - - def get_datasource_info(self, name: str): - """Get datasource info through java gateway.""" - return self.java_gateway.entry_point.getDatasourceInfo(name) - - def get_resources_file_info(self, program_type: str, main_package: str): - """Get resources file info through java gateway.""" - return self.java_gateway.entry_point.getResourcesFileInfo( - program_type, main_package - ) - - def create_or_update_resource( - self, user_name: str, name: str, content: str, description: Optional[str] = None - ): - """Create or update resource through java gateway.""" - return self.java_gateway.entry_point.createOrUpdateResource( - user_name, name, description, content - ) - - def query_resources_file_info(self, user_name: str, name: str): - """Get resources file info through java gateway.""" - return self.java_gateway.entry_point.queryResourcesFileInfo(user_name, name) - - def query_environment_info(self, name: str): - """Get environment info through java gateway.""" - return self.java_gateway.entry_point.getEnvironmentInfo(name) - - def get_code_and_version( - self, project_name: str, process_definition_name: str, task_name: str - ): - """Get code and version through java gateway.""" - return self.java_gateway.entry_point.getCodeAndVersion( - project_name, process_definition_name, task_name - ) - - def create_or_grant_project( - self, user: str, name: str, description: Optional[str] = None - ): - """Create or grant project through java gateway.""" - return self.java_gateway.entry_point.createOrGrantProject( - user, name, description - ) - - def query_project_by_name(self, user: str, name: str): - """Query project through java gateway.""" - return self.java_gateway.entry_point.queryProjectByName(user, name) - - def update_project( - self, user: str, project_code: int, project_name: str, description: str - ): - """Update project through java gateway.""" - return self.java_gateway.entry_point.updateProject( - user, project_code, project_name, description - ) - - def delete_project(self, user: str, code: int): - """Delete project through java gateway.""" - return self.java_gateway.entry_point.deleteProject(user, code) - - def create_tenant( - self, tenant_name: str, queue_name: str, description: Optional[str] = None - ): - """Create tenant through java gateway.""" - return self.java_gateway.entry_point.createTenant( - tenant_name, description, queue_name - ) - - def query_tenant(self, tenant_code: str): - """Query tenant through java gateway.""" - return self.java_gateway.entry_point.queryTenantByCode(tenant_code) - - def grant_tenant_to_user(self, user_name: str, tenant_code: str): - """Grant tenant to user through java gateway.""" - return self.java_gateway.entry_point.grantTenantToUser(user_name, tenant_code) - - def update_tenant( - self, - user: str, - tenant_id: int, - code: str, - queue_id: int, - description: Optional[str] = None, - ): - """Update tenant through java gateway.""" - return self.java_gateway.entry_point.updateTenant( - user, tenant_id, code, queue_id, description - ) - - def delete_tenant(self, user: str, tenant_id: int): - """Delete tenant through java gateway.""" - return self.java_gateway.entry_point.deleteTenantById(user, tenant_id) - - def create_user( - self, - name: str, - password: str, - email: str, - phone: str, - tenant: str, - queue: str, - status: int, - ): - """Create user through java gateway.""" - return self.java_gateway.entry_point.createUser( - name, password, email, phone, tenant, queue, status - ) - - def query_user(self, user_id: int): - """Query user through java gateway.""" - return self.java_gateway.queryUser(user_id) - - def update_user( - self, - name: str, - password: str, - email: str, - phone: str, - tenant: str, - queue: str, - status: int, - ): - """Update user through java gateway.""" - return self.java_gateway.entry_point.updateUser( - name, password, email, phone, tenant, queue, status - ) - - def delete_user(self, name: str, user_id: int): - """Delete user through java gateway.""" - return self.java_gateway.entry_point.deleteUser(name, user_id) - - def get_dependent_info( - self, - project_name: str, - process_definition_name: str, - task_name: Optional[str] = None, - ): - """Get dependent info through java gateway.""" - return self.java_gateway.entry_point.getDependentInfo( - project_name, process_definition_name, task_name - ) - - def get_process_definition_info( - self, user_name: str, project_name: str, process_definition_name: str - ): - """Get process definition info through java gateway.""" - return self.java_gateway.entry_point.getProcessDefinitionInfo( - user_name, project_name, process_definition_name - ) - - def create_or_update_process_definition( - self, - user_name: str, - project_name: str, - name: str, - description: str, - global_params: str, - warning_type: str, - warning_group_id: int, - timeout: int, - worker_group: str, - tenant_code: str, - release_state: int, - task_relation_json: str, - task_definition_json: str, - schedule: Optional[str] = None, - other_params_json: Optional[str] = None, - execution_type: Optional[str] = None, - ): - """Create or update process definition through java gateway.""" - return self.java_gateway.entry_point.createOrUpdateProcessDefinition( - user_name, - project_name, - name, - description, - global_params, - schedule, - warning_type, - warning_group_id, - timeout, - worker_group, - tenant_code, - release_state, - task_relation_json, - task_definition_json, - other_params_json, - execution_type, - ) - - def exec_process_instance( - self, - user_name: str, - project_name: str, - process_definition_name: str, - cron_time: str, - worker_group: str, - warning_type: str, - warning_group_id: int, - timeout: int, - ): - """Exec process instance through java gateway.""" - return self.java_gateway.entry_point.execProcessInstance( - user_name, - project_name, - process_definition_name, - cron_time, - worker_group, - warning_type, - warning_group_id, - timeout, - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/__init__.py deleted file mode 100644 index b289954caa..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init Models package, keeping object related to DolphinScheduler covert from Java Gateway Service.""" - -from pydolphinscheduler.models.base import Base -from pydolphinscheduler.models.base_side import BaseSide -from pydolphinscheduler.models.project import Project -from pydolphinscheduler.models.queue import Queue -from pydolphinscheduler.models.tenant import Tenant -from pydolphinscheduler.models.user import User -from pydolphinscheduler.models.worker_group import WorkerGroup - -__all__ = [ - "Base", - "BaseSide", - "Project", - "Tenant", - "User", - "Queue", - "WorkerGroup", -] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base.py deleted file mode 100644 index 2647714af0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base.py +++ /dev/null @@ -1,74 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler Base object.""" - -from typing import Dict, Optional - -# from pydolphinscheduler.models.user import User -from pydolphinscheduler.utils.string import attr2camel - - -class Base: - """DolphinScheduler Base object.""" - - # Object key attribute, to test whether object equals and so on. - _KEY_ATTR: set = {"name", "description"} - - # Object defines attribute, use when needs to communicate with Java gateway server. - _DEFINE_ATTR: set = set() - - # Object default attribute, will add those attribute to `_DEFINE_ATTR` when init assign missing. - _DEFAULT_ATTR: Dict = {} - - def __init__(self, name: str, description: Optional[str] = None): - self.name = name - self.description = description - - def __repr__(self) -> str: - return f'<{type(self).__name__}: name="{self.name}">' - - def __eq__(self, other): - return type(self) == type(other) and all( - getattr(self, a, None) == getattr(other, a, None) for a in self._KEY_ATTR - ) - - def get_define_custom( - self, camel_attr: bool = True, custom_attr: set = None - ) -> Dict: - """Get object definition attribute by given attr set.""" - content = {} - for attr in custom_attr: - val = getattr(self, attr, None) - if camel_attr: - content[attr2camel(attr)] = val - else: - content[attr] = val - return content - - def get_define(self, camel_attr: bool = True) -> Dict: - """Get object definition attribute communicate to Java gateway server. - - use attribute `self._DEFINE_ATTR` to determine which attributes should including when - object tries to communicate with Java gateway server. - """ - content = self.get_define_custom(camel_attr, self._DEFINE_ATTR) - update_default = { - k: self._DEFAULT_ATTR.get(k) for k in self._DEFAULT_ATTR if k not in content - } - content.update(update_default) - return content diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base_side.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base_side.py deleted file mode 100644 index 99b4007a85..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/base_side.py +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module for models object.""" - -from typing import Optional - -from pydolphinscheduler import configuration -from pydolphinscheduler.models import Base - - -class BaseSide(Base): - """Base class for models object, it declare base behavior for them.""" - - def __init__(self, name: str, description: Optional[str] = None): - super().__init__(name, description) - - @classmethod - def create_if_not_exists( - cls, - # TODO comment for avoiding cycle import - # user: Optional[User] = ProcessDefinitionDefault.USER - user=configuration.WORKFLOW_USER, - ): - """Create Base if not exists.""" - raise NotImplementedError - - def delete_all(self): - """Delete all method.""" - if not self: - return - list_pro = [key for key in self.__dict__.keys()] - for key in list_pro: - self.__delattr__(key) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/project.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/project.py deleted file mode 100644 index 678332ba3b..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/project.py +++ /dev/null @@ -1,72 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler Project object.""" - -from typing import Optional - -from pydolphinscheduler import configuration -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models import BaseSide - - -class Project(BaseSide): - """DolphinScheduler Project object.""" - - def __init__( - self, - name: str = configuration.WORKFLOW_PROJECT, - description: Optional[str] = None, - code: Optional[int] = None, - ): - super().__init__(name, description) - self.code = code - - def create_if_not_exists(self, user=configuration.USER_NAME) -> None: - """Create Project if not exists.""" - JavaGate().create_or_grant_project(user, self.name, self.description) - # TODO recover result checker - # gateway_result_checker(result, None) - - @classmethod - def get_project_by_name(cls, user=configuration.USER_NAME, name=None) -> "Project": - """Get Project by name.""" - project = JavaGate().query_project_by_name(user, name) - if project is None: - return cls() - return cls( - name=project.getName(), - description=project.getDescription(), - code=project.getCode(), - ) - - def update( - self, - user=configuration.USER_NAME, - project_code=None, - project_name=None, - description=None, - ) -> None: - """Update Project.""" - JavaGate().update_project(user, project_code, project_name, description) - self.name = project_name - self.description = description - - def delete(self, user=configuration.USER_NAME) -> None: - """Delete Project.""" - JavaGate().delete_project(user, self.code) - self.delete_all() diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/queue.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/queue.py deleted file mode 100644 index e6da2594c8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/queue.py +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler User object.""" - -from typing import Optional - -from pydolphinscheduler import configuration -from pydolphinscheduler.models import BaseSide - - -class Queue(BaseSide): - """DolphinScheduler Queue object.""" - - def __init__( - self, - name: str = configuration.WORKFLOW_QUEUE, - description: Optional[str] = "", - ): - super().__init__(name, description) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/tenant.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/tenant.py deleted file mode 100644 index 09b00ccf3a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/tenant.py +++ /dev/null @@ -1,80 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler Tenant object.""" - -from typing import Optional - -from pydolphinscheduler import configuration -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models import BaseSide - - -class Tenant(BaseSide): - """DolphinScheduler Tenant object.""" - - def __init__( - self, - name: str = configuration.WORKFLOW_TENANT, - queue: str = configuration.WORKFLOW_QUEUE, - description: Optional[str] = None, - tenant_id: Optional[int] = None, - code: Optional[str] = None, - user_name: Optional[str] = None, - ): - super().__init__(name, description) - self.tenant_id = tenant_id - self.queue = queue - self.code = code - self.user_name = user_name - - def create_if_not_exists( - self, queue_name: str, user=configuration.USER_NAME - ) -> None: - """Create Tenant if not exists.""" - tenant = JavaGate().create_tenant(self.name, self.description, queue_name) - self.tenant_id = tenant.getId() - self.code = tenant.getTenantCode() - # gateway_result_checker(result, None) - - @classmethod - def get_tenant(cls, code: str) -> "Tenant": - """Get Tenant list.""" - tenant = JavaGate().query_tenant(code) - if tenant is None: - return cls() - return cls( - description=tenant.getDescription(), - tenant_id=tenant.getId(), - code=tenant.getTenantCode(), - queue=tenant.getQueueId(), - ) - - def update( - self, user=configuration.USER_NAME, code=None, queue_id=None, description=None - ) -> None: - """Update Tenant.""" - JavaGate().update_tenant(user, self.tenant_id, code, queue_id, description) - # TODO: check queue_id and queue_name - self.queue = str(queue_id) - self.code = code - self.description = description - - def delete(self) -> None: - """Delete Tenant.""" - JavaGate().delete_tenant(self.user_name, self.tenant_id) - self.delete_all() diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/user.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/user.py deleted file mode 100644 index 57c6af647f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/user.py +++ /dev/null @@ -1,130 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler User object.""" - -from typing import Optional - -from pydolphinscheduler import configuration -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models import BaseSide, Tenant - - -class User(BaseSide): - """DolphinScheduler User object.""" - - _KEY_ATTR = { - "name", - "password", - "email", - "phone", - "tenant", - "queue", - "status", - } - - def __init__( - self, - name: str, - password: Optional[str] = configuration.USER_PASSWORD, - email: Optional[str] = configuration.USER_EMAIL, - phone: Optional[str] = configuration.USER_PHONE, - tenant: Optional[str] = configuration.WORKFLOW_TENANT, - queue: Optional[str] = configuration.WORKFLOW_QUEUE, - status: Optional[int] = configuration.USER_STATE, - ): - super().__init__(name) - self.user_id: Optional[int] = None - self.password = password - self.email = email - self.phone = phone - self.tenant = tenant - self.queue = queue - self.status = status - - def create_tenant_if_not_exists(self) -> None: - """Create tenant object.""" - tenant = Tenant(name=self.tenant, queue=self.queue) - tenant.create_if_not_exists(self.queue) - - def create_if_not_exists(self, **kwargs): - """Create User if not exists.""" - # Should make sure queue already exists. - self.create_tenant_if_not_exists() - user = JavaGate().create_user( - self.name, - self.password, - self.email, - self.phone, - self.tenant, - self.queue, - self.status, - ) - self.user_id = user.getId() - # TODO recover result checker - # gateway_result_checker(result, None) - - @classmethod - def get_user(cls, user_id) -> "User": - """Get User.""" - user = JavaGate().query_user(user_id) - if user is None: - return cls("") - user_id = user.getId() - user = cls( - name=user.getUserName(), - password=user.getUserPassword(), - email=user.getEmail(), - phone=user.getPhone(), - tenant=user.getTenantCode(), - queue=user.getQueueName(), - status=user.getState(), - ) - user.user_id = user_id - return user - - def update( - self, - password=None, - email=None, - phone=None, - tenant=None, - queue=None, - status=None, - ) -> None: - """Update User.""" - user = JavaGate().update_user( - self.name, - password, - email, - phone, - tenant, - queue, - status, - ) - self.user_id = user.getId() - self.name = user.getUserName() - self.password = user.getUserPassword() - self.email = user.getEmail() - self.phone = user.getPhone() - self.queue = user.getQueueName() - self.status = user.getState() - - def delete(self) -> None: - """Delete User.""" - JavaGate().delete_user(self.name, self.user_id) - self.delete_all() diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/worker_group.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/worker_group.py deleted file mode 100644 index bc55eafc34..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/models/worker_group.py +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler Worker Group object.""" - -from typing import Optional - -from pydolphinscheduler.models import BaseSide - - -class WorkerGroup(BaseSide): - """DolphinScheduler Worker Group object.""" - - def __init__(self, name: str, address: str, description: Optional[str] = None): - super().__init__(name, description) - self.address = address diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/__init__.py deleted file mode 100644 index 1e24e1eb87..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init resources_plugin package.""" -from pydolphinscheduler.resources_plugin.github import GitHub -from pydolphinscheduler.resources_plugin.gitlab import GitLab -from pydolphinscheduler.resources_plugin.local import Local -from pydolphinscheduler.resources_plugin.oss import OSS -from pydolphinscheduler.resources_plugin.s3 import S3 - -__all__ = ["Local", "GitHub", "GitLab", "OSS", "S3"] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/__init__.py deleted file mode 100644 index 4253cda64d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init base package.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/bucket.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/bucket.py deleted file mode 100644 index bae4366c81..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/bucket.py +++ /dev/null @@ -1,86 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler BucketFileInfo and Bucket object.""" -from abc import ABCMeta, abstractmethod -from typing import Optional - - -class BucketFileInfo: - """A class that defines the details of BUCKET files. - - :param bucket: A string representing the bucket to which the bucket file belongs. - :param file_path: A string representing the bucket file path. - """ - - def __init__( - self, - bucket: Optional[str] = None, - file_path: Optional[str] = None, - *args, - **kwargs - ): - self.bucket = bucket - self.file_path = file_path - - -class OSSFileInfo(BucketFileInfo): - """A class that defines the details of OSS files. - - :param endpoint: A string representing the OSS file endpoint. - :param bucket: A string representing the bucket to which the OSS file belongs. - :param file_path: A string representing the OSS file path. - """ - - def __init__( - self, - endpoint: Optional[str] = None, - bucket: Optional[str] = None, - file_path: Optional[str] = None, - *args, - **kwargs - ): - super().__init__(bucket=bucket, file_path=file_path, *args, **kwargs) - self.endpoint = endpoint - - -class S3FileInfo(BucketFileInfo): - """A class that defines the details of S3 files. - - :param bucket: A string representing the bucket to which the S3 file belongs. - :param file_path: A string representing the S3 file path. - """ - - def __init__( - self, - bucket: Optional[str] = None, - file_path: Optional[str] = None, - *args, - **kwargs - ): - super().__init__(bucket=bucket, file_path=file_path, *args, **kwargs) - - -class Bucket(object, metaclass=ABCMeta): - """An abstract class of online code repository based on git implementation.""" - - _bucket_file_info: Optional = None - - @abstractmethod - def get_bucket_file_info(self, path: str): - """Get the detailed information of BUCKET file according to the file URL.""" - raise NotImplementedError diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/git.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/git.py deleted file mode 100644 index 4fc2a17ccb..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/base/git.py +++ /dev/null @@ -1,115 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler GitFileInfo and Git object.""" - -from abc import ABCMeta, abstractmethod -from typing import Optional - - -class GitFileInfo: - """A class that defines the details of GIT files. - - :param user: A string representing the user the git file belongs to. - :param repo_name: A string representing the repository to which the git file belongs. - :param branch: A string representing the branch to which the git file belongs. - :param file_path: A string representing the git file path. - """ - - def __init__( - self, - user: Optional[str] = None, - repo_name: Optional[str] = None, - branch: Optional[str] = None, - file_path: Optional[str] = None, - *args, - **kwargs - ): - self.user = user - self.repo_name = repo_name - self.branch = branch - self.file_path = file_path - - -class GitHubFileInfo(GitFileInfo): - """A class that defines the details of GitHub files. - - :param user: A string representing the user the GitHub file belongs to. - :param repo_name: A string representing the repository to which the GitHub file belongs. - :param branch: A string representing the branch to which the GitHub file belongs. - :param file_path: A string representing the GitHub file path. - """ - - def __init__( - self, - user: Optional[str] = None, - repo_name: Optional[str] = None, - branch: Optional[str] = None, - file_path: Optional[str] = None, - *args, - **kwargs - ): - super().__init__( - user=user, - repo_name=repo_name, - branch=branch, - file_path=file_path, - *args, - **kwargs - ) - - -class GitLabFileInfo(GitFileInfo): - """A class that defines the details of GitLab files. - - :param host: A string representing the domain name the GitLab file belongs to. - :param user: A string representing the user the GitLab file belongs to. - :param repo_name: A string representing the repository to which the GitLab file belongs. - :param branch: A string representing the branch to which the GitHub file belongs. - :param file_path: A string representing the GitHub file path. - """ - - def __init__( - self, - host: Optional[str] = None, - user: Optional[str] = None, - repo_name: Optional[str] = None, - branch: Optional[str] = None, - file_path: Optional[str] = None, - *args, - **kwargs - ): - super().__init__( - user=user, - repo_name=repo_name, - branch=branch, - file_path=file_path, - *args, - **kwargs - ) - self.host = host - - -class Git(object, metaclass=ABCMeta): - """An abstract class of online code repository based on git implementation.""" - - _git_file_info: Optional = None - - @abstractmethod - def get_git_file_info(self, path: str): - """Get the detailed information of GIT file according to the file URL.""" - raise NotImplementedError diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py deleted file mode 100644 index 45648647c6..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py +++ /dev/null @@ -1,106 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler github resource plugin.""" -import base64 -from typing import Optional -from urllib.parse import urljoin - -import requests - -from pydolphinscheduler.constants import Symbol -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.resources_plugin.base.git import Git, GitHubFileInfo - - -class GitHub(ResourcePlugin, Git): - """GitHub resource plugin, a plugin for task and workflow to dolphinscheduler to read resource. - - :param prefix: A string representing the prefix of GitHub. - :param access_token: A string used for identity authentication of GitHub private repository. - """ - - def __init__( - self, prefix: str, access_token: Optional[str] = None, *args, **kwargs - ): - super().__init__(prefix, *args, **kwargs) - self.access_token = access_token - - _git_file_info: Optional[GitHubFileInfo] = None - - def build_req_api( - self, - user: str, - repo_name: str, - file_path: str, - api: str, - ): - """Build request file content API.""" - api = api.replace("{user}", user) - api = api.replace("{repo_name}", repo_name) - api = api.replace("{file_path}", file_path) - return api - - def get_git_file_info(self, path: str): - """Get file information from the file url, like repository name, user, branch, and file path.""" - elements = path.split(Symbol.SLASH) - index = self.get_index(path, Symbol.SLASH, 7) - index = index + 1 - file_info = GitHubFileInfo( - user=elements[3], - repo_name=elements[4], - branch=elements[6], - file_path=path[index:], - ) - self._git_file_info = file_info - - def get_req_url(self): - """Build request URL according to file information.""" - return self.build_req_api( - user=self._git_file_info.user, - repo_name=self._git_file_info.repo_name, - file_path=self._git_file_info.file_path, - api="https://api.github.com/repos/{user}/{repo_name}/contents/{file_path}", - ) - - def read_file(self, suf: str): - """Get the content of the file. - - The address of the file is the prefix of the resource plugin plus the parameter suf. - """ - path = urljoin(self.prefix, suf) - return self.req(path) - - def req(self, path: str): - """Send HTTP request, parse response data, and get file content.""" - headers = { - "Content-Type": "application/json; charset=utf-8", - } - if self.access_token is not None: - headers.setdefault("Authorization", "Bearer %s" % self.access_token) - self.get_git_file_info(path) - response = requests.get( - headers=headers, - url=self.get_req_url(), - params={"ref": self._git_file_info.branch}, - ) - if response.status_code == requests.codes.ok: - json_response = response.json() - content = base64.b64decode(json_response["content"]) - return content.decode("utf-8") - else: - raise Exception(response.json()) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/gitlab.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/gitlab.py deleted file mode 100644 index f035ecaeff..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/gitlab.py +++ /dev/null @@ -1,112 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler gitlab resource plugin.""" -from typing import Optional -from urllib.parse import urljoin, urlparse - -import gitlab -import requests - -from pydolphinscheduler.constants import Symbol -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.resources_plugin.base.git import Git, GitLabFileInfo - - -class GitLab(ResourcePlugin, Git): - """GitLab object, declare GitLab resource plugin for task and workflow to dolphinscheduler. - - :param prefix: A string representing the prefix of GitLab. - :param private_token: A string used for identity authentication of GitLab private or Internal repository. - :param oauth_token: A string used for identity authentication of GitLab private or Internal repository. - :param username: A string representing the user of the repository. - :param password: A string representing the user password. - """ - - def __init__( - self, - prefix: str, - private_token: Optional[str] = None, - oauth_token: Optional[str] = None, - username: Optional[str] = None, - password: Optional[str] = None, - *args, - **kwargs, - ): - super().__init__(prefix, *args, **kwargs) - self.private_token = private_token - self.oauth_token = oauth_token - self.username = username - self.password = password - - def get_git_file_info(self, path: str): - """Get file information from the file url, like repository name, user, branch, and file path.""" - self.get_index(path, Symbol.SLASH, 8) - result = urlparse(path) - elements = result.path.split(Symbol.SLASH) - self._git_file_info = GitLabFileInfo( - host=f"{result.scheme}://{result.hostname}", - repo_name=elements[2], - branch=elements[5], - file_path=Symbol.SLASH.join( - str(elements[i]) for i in range(6, len(elements)) - ), - user=elements[1], - ) - - def authentication(self): - """Gitlab authentication.""" - host = self._git_file_info.host - if self.private_token is not None: - return gitlab.Gitlab(host, private_token=self.private_token) - if self.oauth_token is not None: - return gitlab.Gitlab(host, oauth_token=self.oauth_token) - if self.username is not None and self.password is not None: - oauth_token = self.OAuth_token() - return gitlab.Gitlab(host, oauth_token=oauth_token) - return gitlab.Gitlab(host) - - def OAuth_token(self): - """Obtain OAuth Token.""" - data = { - "grant_type": "password", - "username": self.username, - "password": self.password, - } - host = self._git_file_info.host - resp = requests.post("%s/oauth/token" % host, data=data) - oauth_token = resp.json()["access_token"] - return oauth_token - - def read_file(self, suf: str): - """Get the content of the file. - - The address of the file is the prefix of the resource plugin plus the parameter suf. - """ - path = urljoin(self.prefix, suf) - self.get_git_file_info(path) - gl = self.authentication() - project = gl.projects.get( - "%s/%s" % (self._git_file_info.user, self._git_file_info.repo_name) - ) - return ( - project.files.get( - file_path=self._git_file_info.file_path, ref=self._git_file_info.branch - ) - .decode() - .decode() - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/local.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/local.py deleted file mode 100644 index c1fc56d3d3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/local.py +++ /dev/null @@ -1,56 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler local resource plugin.""" - -import os -from pathlib import Path - -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.exceptions import PyResPluginException - - -class Local(ResourcePlugin): - """Local object, declare local resource plugin for task and workflow to dolphinscheduler. - - :param prefix: A string representing the prefix of Local. - """ - - # [start init_method] - def __init__(self, prefix: str, *args, **kwargs): - super().__init__(prefix, *args, **kwargs) - - # [end init_method] - - # [start read_file_method] - def read_file(self, suf: str): - """Get the content of the file. - - The address of the file is the prefix of the resource plugin plus the parameter suf. - """ - path = Path(self.prefix).joinpath(suf) - if not path.exists(): - raise PyResPluginException("{} is not found".format(str(path))) - if not os.access(str(path), os.R_OK): - raise PyResPluginException( - "You don't have permission to access {}".format(self.prefix + suf) - ) - with open(path, "r") as f: - content = f.read() - return content - - # [end read_file_method] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/oss.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/oss.py deleted file mode 100644 index 1a9acbb9ca..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/oss.py +++ /dev/null @@ -1,76 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler oss resource plugin.""" -from typing import Optional -from urllib.parse import urljoin, urlparse - -import oss2 - -from pydolphinscheduler.constants import Symbol -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.resources_plugin.base.bucket import Bucket, OSSFileInfo - - -class OSS(ResourcePlugin, Bucket): - """OSS object, declare OSS resource plugin for task and workflow to dolphinscheduler. - - :param prefix: A string representing the prefix of OSS. - :param access_key_id: A string representing the ID of AccessKey for AliCloud OSS. - :param access_key_secret: A string representing the secret of AccessKey for AliCloud OSS. - """ - - def __init__( - self, - prefix: str, - access_key_id: Optional[str] = None, - access_key_secret: Optional[str] = None, - *args, - **kwargs, - ): - super().__init__(prefix, *args, **kwargs) - self.access_key_id = access_key_id - self.access_key_secret = access_key_secret - - _bucket_file_info: Optional[OSSFileInfo] = None - - def get_bucket_file_info(self, path: str): - """Get file information from the file url, like repository name, user, branch, and file path.""" - self.get_index(path, Symbol.SLASH, 3) - result = urlparse(path) - hostname = result.hostname - elements = hostname.split(Symbol.POINT) - self._bucket_file_info = OSSFileInfo( - endpoint=f"{result.scheme}://" - f"{Symbol.POINT.join(str(elements[i]) for i in range(1, len(elements)))}", - bucket=hostname.split(Symbol.POINT)[0], - file_path=result.path[1:], - ) - - def read_file(self, suf: str): - """Get the content of the file. - - The address of the file is the prefix of the resource plugin plus the parameter suf. - """ - path = urljoin(self.prefix, suf) - self.get_bucket_file_info(path) - auth = oss2.Auth(self.access_key_id, self.access_key_secret) - bucket = oss2.Bucket( - auth, self._bucket_file_info.endpoint, self._bucket_file_info.bucket - ) - result = bucket.get_object(self._bucket_file_info.file_path).read().decode() - return result.read().decode() diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/s3.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/s3.py deleted file mode 100644 index da1fe83fd1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/s3.py +++ /dev/null @@ -1,74 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""DolphinScheduler S3 resource plugin.""" - -from typing import Optional -from urllib.parse import urljoin - -import boto3 - -from pydolphinscheduler.constants import Symbol -from pydolphinscheduler.core.resource_plugin import ResourcePlugin -from pydolphinscheduler.resources_plugin.base.bucket import Bucket, S3FileInfo - - -class S3(ResourcePlugin, Bucket): - """S3 object, declare S3 resource plugin for task and workflow to dolphinscheduler. - - :param prefix: A string representing the prefix of S3. - :param access_key_id: A string representing the ID of AccessKey for Amazon S3. - :param access_key_secret: A string representing the secret of AccessKey for Amazon S3. - """ - - def __init__( - self, - prefix: str, - access_key_id: Optional[str] = None, - access_key_secret: Optional[str] = None, - *args, - **kwargs - ): - super().__init__(prefix, *args, **kwargs) - self.access_key_id = access_key_id - self.access_key_secret = access_key_secret - - _bucket_file_info: Optional[S3FileInfo] = None - - def get_bucket_file_info(self, path: str): - """Get file information from the file url, like repository name, user, branch, and file path.""" - elements = path.split(Symbol.SLASH) - self.get_index(path, Symbol.SLASH, 3) - self._bucket_file_info = S3FileInfo( - bucket=elements[2].split(Symbol.POINT)[0], - file_path=Symbol.SLASH.join( - str(elements[i]) for i in range(3, len(elements)) - ), - ) - - def read_file(self, suf: str): - """Get the content of the file. - - The address of the file is the prefix of the resource plugin plus the parameter suf. - """ - path = urljoin(self.prefix, suf) - self.get_bucket_file_info(path) - bucket = self._bucket_file_info.bucket - key = self._bucket_file_info.file_path - s3_resource = boto3.resource("s3") - s3_object = s3_resource.Object(bucket, key) - return s3_object.get()["Body"].read().decode("utf-8") diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py deleted file mode 100644 index 972b1b76dd..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py +++ /dev/null @@ -1,69 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init pydolphinscheduler.tasks package.""" - -from pydolphinscheduler.tasks.condition import FAILURE, SUCCESS, And, Condition, Or -from pydolphinscheduler.tasks.datax import CustomDataX, DataX -from pydolphinscheduler.tasks.dependent import Dependent -from pydolphinscheduler.tasks.dvc import DVCDownload, DVCInit, DVCUpload -from pydolphinscheduler.tasks.flink import Flink -from pydolphinscheduler.tasks.http import Http -from pydolphinscheduler.tasks.map_reduce import MR -from pydolphinscheduler.tasks.mlflow import ( - MLflowModels, - MLFlowProjectsAutoML, - MLFlowProjectsBasicAlgorithm, - MLFlowProjectsCustom, -) -from pydolphinscheduler.tasks.openmldb import OpenMLDB -from pydolphinscheduler.tasks.procedure import Procedure -from pydolphinscheduler.tasks.python import Python -from pydolphinscheduler.tasks.pytorch import Pytorch -from pydolphinscheduler.tasks.sagemaker import SageMaker -from pydolphinscheduler.tasks.shell import Shell -from pydolphinscheduler.tasks.spark import Spark -from pydolphinscheduler.tasks.sql import Sql -from pydolphinscheduler.tasks.sub_process import SubProcess -from pydolphinscheduler.tasks.switch import Branch, Default, Switch, SwitchCondition - -__all__ = [ - "Condition", - "DataX", - "CustomDataX", - "Dependent", - "DVCInit", - "DVCUpload", - "DVCDownload", - "Flink", - "Http", - "MR", - "OpenMLDB", - "MLFlowProjectsBasicAlgorithm", - "MLFlowProjectsCustom", - "MLFlowProjectsAutoML", - "MLflowModels", - "Procedure", - "Python", - "Pytorch", - "Shell", - "Spark", - "Sql", - "SubProcess", - "Switch", - "SageMaker", -] diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py deleted file mode 100644 index cb139f1587..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py +++ /dev/null @@ -1,204 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task Conditions.""" - -from typing import Dict, List - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.models.base import Base - - -class Status(Base): - """Base class of Condition task status. - - It a parent class for :class:`SUCCESS` and :class:`FAILURE`. Provider status name - and :func:`get_define` to sub class. - """ - - def __init__(self, *tasks): - super().__init__(f"Condition.{self.status_name()}") - self.tasks = tasks - - def __repr__(self) -> str: - return "depend_item_list" - - @classmethod - def status_name(cls) -> str: - """Get name for Status or its sub class.""" - return cls.__name__.upper() - - def get_define(self, camel_attr: bool = True) -> List: - """Get status definition attribute communicate to Java gateway server.""" - content = [] - for task in self.tasks: - if not isinstance(task, Task): - raise PyDSParamException( - "%s only accept class Task or sub class Task, but get %s", - (self.status_name(), type(task)), - ) - content.append({"depTaskCode": task.code, "status": self.status_name()}) - return content - - -class SUCCESS(Status): - """Class SUCCESS to task condition, sub class of :class:`Status`.""" - - def __init__(self, *tasks): - super().__init__(*tasks) - - -class FAILURE(Status): - """Class FAILURE to task condition, sub class of :class:`Status`.""" - - def __init__(self, *tasks): - super().__init__(*tasks) - - -class ConditionOperator(Base): - """Set ConditionTask or ConditionOperator with specific operator.""" - - _DEFINE_ATTR = { - "relation", - } - - def __init__(self, *args): - super().__init__(self.__class__.__name__) - self.args = args - - def __repr__(self) -> str: - return "depend_task_list" - - @classmethod - def operator_name(cls) -> str: - """Get operator name in different class.""" - return cls.__name__.upper() - - @property - def relation(self) -> str: - """Get operator name in different class, for function :func:`get_define`.""" - return self.operator_name() - - def set_define_attr(self) -> str: - """Set attribute to function :func:`get_define`. - - It is a wrapper for both `And` and `Or` operator. - """ - result = [] - attr = None - for condition in self.args: - if isinstance(condition, (Status, ConditionOperator)): - if attr is None: - attr = repr(condition) - elif repr(condition) != attr: - raise PyDSParamException( - "Condition %s operator parameter only support same type.", - self.relation, - ) - else: - raise PyDSParamException( - "Condition %s operator parameter support ConditionTask and ConditionOperator but got %s.", - (self.relation, type(condition)), - ) - if attr == "depend_item_list": - result.extend(condition.get_define()) - else: - result.append(condition.get_define()) - setattr(self, attr, result) - return attr - - def get_define(self, camel_attr=True) -> Dict: - """Overwrite Base.get_define to get task Condition specific get define.""" - attr = self.set_define_attr() - dependent_define_attr = self._DEFINE_ATTR.union({attr}) - return super().get_define_custom( - camel_attr=True, custom_attr=dependent_define_attr - ) - - -class And(ConditionOperator): - """Operator And for task condition. - - It could accept both :class:`Task` and children of :class:`ConditionOperator`, - and set AND condition to those args. - """ - - def __init__(self, *args): - super().__init__(*args) - - -class Or(ConditionOperator): - """Operator Or for task condition. - - It could accept both :class:`Task` and children of :class:`ConditionOperator`, - and set OR condition to those args. - """ - - def __init__(self, *args): - super().__init__(*args) - - -class Condition(Task): - """Task condition object, declare behavior for condition task to dolphinscheduler.""" - - def __init__( - self, - name: str, - condition: ConditionOperator, - success_task: Task, - failed_task: Task, - *args, - **kwargs, - ): - super().__init__(name, TaskType.CONDITIONS, *args, **kwargs) - self.condition = condition - self.success_task = success_task - self.failed_task = failed_task - # Set condition tasks as current task downstream - self._set_dep() - - def _set_dep(self) -> None: - """Set upstream according to parameter `condition`.""" - upstream = [] - for cond in self.condition.args: - if isinstance(cond, ConditionOperator): - for status in cond.args: - upstream.extend(list(status.tasks)) - self.set_upstream(upstream) - self.set_downstream([self.success_task, self.failed_task]) - - @property - def condition_result(self) -> Dict: - """Get condition result define for java gateway.""" - return { - "successNode": [self.success_task.code], - "failedNode": [self.failed_task.code], - } - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for Condition task. - - Condition task have some specials attribute `dependence`, and in most of the task - this attribute is None and use empty dict `{}` as default value. We do not use class - attribute `_task_custom_attr` due to avoid attribute cover. - """ - params = super().task_params - params["dependence"] = self.condition.get_define() - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/datax.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/datax.py deleted file mode 100644 index 945f7824e4..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/datax.py +++ /dev/null @@ -1,127 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task datax.""" - -from typing import Dict, List, Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.database import Database -from pydolphinscheduler.core.task import Task - - -class CustomDataX(Task): - """Task CustomDatax object, declare behavior for custom DataX task to dolphinscheduler. - - You provider json template for DataX, it can synchronize data according to the template you provided. - """ - - CUSTOM_CONFIG = 1 - - _task_custom_attr = {"custom_config", "json", "xms", "xmx"} - - ext: set = {".json"} - ext_attr: str = "_json" - - def __init__( - self, - name: str, - json: str, - xms: Optional[int] = 1, - xmx: Optional[int] = 1, - *args, - **kwargs - ): - self._json = json - super().__init__(name, TaskType.DATAX, *args, **kwargs) - self.custom_config = self.CUSTOM_CONFIG - self.xms = xms - self.xmx = xmx - - -class DataX(Task): - """Task DataX object, declare behavior for DataX task to dolphinscheduler. - - It should run database datax job in multiply sql link engine, such as: - - MySQL - - Oracle - - Postgresql - - SQLServer - You provider datasource_name and datatarget_name contain connection information, it decisions which - database type and database instance would synchronous data. - """ - - CUSTOM_CONFIG = 0 - - _task_custom_attr = { - "custom_config", - "sql", - "target_table", - "job_speed_byte", - "job_speed_record", - "pre_statements", - "post_statements", - "xms", - "xmx", - } - - ext: set = {".sql"} - ext_attr: str = "_sql" - - def __init__( - self, - name: str, - datasource_name: str, - datatarget_name: str, - sql: str, - target_table: str, - job_speed_byte: Optional[int] = 0, - job_speed_record: Optional[int] = 1000, - pre_statements: Optional[List[str]] = None, - post_statements: Optional[List[str]] = None, - xms: Optional[int] = 1, - xmx: Optional[int] = 1, - *args, - **kwargs - ): - self._sql = sql - super().__init__(name, TaskType.DATAX, *args, **kwargs) - self.custom_config = self.CUSTOM_CONFIG - self.datasource_name = datasource_name - self.datatarget_name = datatarget_name - self.target_table = target_table - self.job_speed_byte = job_speed_byte - self.job_speed_record = job_speed_record - self.pre_statements = pre_statements or [] - self.post_statements = post_statements or [] - self.xms = xms - self.xmx = xmx - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for datax task. - - datax task have some specials attribute for task_params, and is odd if we - directly set as python property, so we Override Task.task_params here. - """ - params = super().task_params - datasource = Database(self.datasource_name, "dsType", "dataSource") - params.update(datasource) - - datatarget = Database(self.datatarget_name, "dtType", "dataTarget") - params.update(datatarget) - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dependent.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dependent.py deleted file mode 100644 index 12ec6ba91d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dependent.py +++ /dev/null @@ -1,273 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task dependent.""" - -from typing import Dict, Optional, Tuple - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSJavaGatewayException, PyDSParamException -from pydolphinscheduler.java_gateway import JavaGate -from pydolphinscheduler.models.base import Base - -DEPENDENT_ALL_TASK_IN_WORKFLOW = "0" - - -class DependentDate(str): - """Constant of Dependent date value. - - These values set according to Java server models, if you want to add and change it, - please change Java server models first. - """ - - # TODO Maybe we should add parent level to DependentDate for easy to use, such as - # DependentDate.MONTH.THIS_MONTH - - # Hour - CURRENT_HOUR = "currentHour" - LAST_ONE_HOUR = "last1Hour" - LAST_TWO_HOURS = "last2Hours" - LAST_THREE_HOURS = "last3Hours" - LAST_TWENTY_FOUR_HOURS = "last24Hours" - - # Day - TODAY = "today" - LAST_ONE_DAYS = "last1Days" - LAST_TWO_DAYS = "last2Days" - LAST_THREE_DAYS = "last3Days" - LAST_SEVEN_DAYS = "last7Days" - - # Week - THIS_WEEK = "thisWeek" - LAST_WEEK = "lastWeek" - LAST_MONDAY = "lastMonday" - LAST_TUESDAY = "lastTuesday" - LAST_WEDNESDAY = "lastWednesday" - LAST_THURSDAY = "lastThursday" - LAST_FRIDAY = "lastFriday" - LAST_SATURDAY = "lastSaturday" - LAST_SUNDAY = "lastSunday" - - # Month - THIS_MONTH = "thisMonth" - LAST_MONTH = "lastMonth" - LAST_MONTH_BEGIN = "lastMonthBegin" - LAST_MONTH_END = "lastMonthEnd" - - -class DependentItem(Base): - """Dependent item object, minimal unit for task dependent. - - It declare which project, process_definition, task are dependent to this task. - """ - - _DEFINE_ATTR = { - "project_code", - "definition_code", - "dep_task_code", - "cycle", - "date_value", - } - - # TODO maybe we should conside overwrite operator `and` and `or` for DependentItem to - # support more easy way to set relation - def __init__( - self, - project_name: str, - process_definition_name: str, - dependent_task_name: Optional[str] = DEPENDENT_ALL_TASK_IN_WORKFLOW, - dependent_date: Optional[DependentDate] = DependentDate.TODAY, - ): - obj_name = f"{project_name}.{process_definition_name}.{dependent_task_name}.{dependent_date}" - super().__init__(obj_name) - self.project_name = project_name - self.process_definition_name = process_definition_name - self.dependent_task_name = dependent_task_name - if dependent_date is None: - raise PyDSParamException( - "Parameter dependent_date must provider by got None." - ) - else: - self.dependent_date = dependent_date - self._code = {} - - def __repr__(self) -> str: - return "depend_item_list" - - @property - def project_code(self) -> str: - """Get dependent project code.""" - return self.get_code_from_gateway().get("projectCode") - - @property - def definition_code(self) -> str: - """Get dependent definition code.""" - return self.get_code_from_gateway().get("processDefinitionCode") - - @property - def dep_task_code(self) -> str: - """Get dependent tasks code list.""" - if self.is_all_task: - return DEPENDENT_ALL_TASK_IN_WORKFLOW - else: - return self.get_code_from_gateway().get("taskDefinitionCode") - - # TODO Maybe we should get cycle from dependent date class. - @property - def cycle(self) -> str: - """Get dependent cycle.""" - if "Hour" in self.dependent_date: - return "hour" - elif self.dependent_date == "today" or "Days" in self.dependent_date: - return "day" - elif "Month" in self.dependent_date: - return "month" - else: - return "week" - - @property - def date_value(self) -> str: - """Get dependent date.""" - return self.dependent_date - - @property - def is_all_task(self) -> bool: - """Check whether dependent all tasks or not.""" - return self.dependent_task_name == DEPENDENT_ALL_TASK_IN_WORKFLOW - - @property - def code_parameter(self) -> Tuple: - """Get name info parameter to query code.""" - param = ( - self.project_name, - self.process_definition_name, - self.dependent_task_name if not self.is_all_task else None, - ) - return param - - def get_code_from_gateway(self) -> Dict: - """Get project, definition, task code from given parameter.""" - if self._code: - return self._code - else: - try: - self._code = JavaGate().get_dependent_info(*self.code_parameter) - return self._code - except Exception: - raise PyDSJavaGatewayException("Function get_code_from_gateway error.") - - -class DependentOperator(Base): - """Set DependentItem or dependItemList with specific operator.""" - - _DEFINE_ATTR = { - "relation", - } - - def __init__(self, *args): - super().__init__(self.__class__.__name__) - self.args = args - - def __repr__(self) -> str: - return "depend_task_list" - - @classmethod - def operator_name(cls) -> str: - """Get operator name in different class.""" - return cls.__name__.upper() - - @property - def relation(self) -> str: - """Get operator name in different class, for function :func:`get_define`.""" - return self.operator_name() - - def set_define_attr(self) -> str: - """Set attribute to function :func:`get_define`. - - It is a wrapper for both `And` and `Or` operator. - """ - result = [] - attr = None - for dependent in self.args: - if isinstance(dependent, (DependentItem, DependentOperator)): - if attr is None: - attr = repr(dependent) - elif repr(dependent) != attr: - raise PyDSParamException( - "Dependent %s operator parameter only support same type.", - self.relation, - ) - else: - raise PyDSParamException( - "Dependent %s operator parameter support DependentItem and " - "DependentOperator but got %s.", - (self.relation, type(dependent)), - ) - result.append(dependent.get_define()) - setattr(self, attr, result) - return attr - - def get_define(self, camel_attr=True) -> Dict: - """Overwrite Base.get_define to get task dependent specific get define.""" - attr = self.set_define_attr() - dependent_define_attr = self._DEFINE_ATTR.union({attr}) - return super().get_define_custom( - camel_attr=True, custom_attr=dependent_define_attr - ) - - -class And(DependentOperator): - """Operator And for task dependent. - - It could accept both :class:`DependentItem` and children of :class:`DependentOperator`, - and set AND condition to those args. - """ - - def __init__(self, *args): - super().__init__(*args) - - -class Or(DependentOperator): - """Operator Or for task dependent. - - It could accept both :class:`DependentItem` and children of :class:`DependentOperator`, - and set OR condition to those args. - """ - - def __init__(self, *args): - super().__init__(*args) - - -class Dependent(Task): - """Task dependent object, declare behavior for dependent task to dolphinscheduler.""" - - def __init__(self, name: str, dependence: DependentOperator, *args, **kwargs): - super().__init__(name, TaskType.DEPENDENT, *args, **kwargs) - self.dependence = dependence - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for dependent task. - - Dependent task have some specials attribute `dependence`, and in most of the task - this attribute is None and use empty dict `{}` as default value. We do not use class - attribute `_task_custom_attr` due to avoid attribute cover. - """ - params = super().task_params - params["dependence"] = self.dependence.get_define() - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dvc.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dvc.py deleted file mode 100644 index c5b5cd5c91..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dvc.py +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task dvc.""" -from copy import deepcopy -from typing import Dict - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task - - -class DvcTaskType(str): - """Constants for dvc task type.""" - - INIT = "Init DVC" - DOWNLOAD = "Download" - UPLOAD = "Upload" - - -class BaseDVC(Task): - """Base class for dvc task.""" - - dvc_task_type = None - - _task_custom_attr = { - "dvc_task_type", - "dvc_repository", - } - - _child_task_dvc_attr = set() - - def __init__(self, name: str, repository: str, *args, **kwargs): - super().__init__(name, TaskType.DVC, *args, **kwargs) - self.dvc_repository = repository - - @property - def task_params(self) -> Dict: - """Return task params.""" - self._task_custom_attr = deepcopy(self._task_custom_attr) - self._task_custom_attr.update(self._child_task_dvc_attr) - return super().task_params - - -class DVCInit(BaseDVC): - """Task DVC Init object, declare behavior for DVC Init task to dolphinscheduler.""" - - dvc_task_type = DvcTaskType.INIT - - _child_task_dvc_attr = {"dvc_store_url"} - - def __init__(self, name: str, repository: str, store_url: str, *args, **kwargs): - super().__init__(name, repository, *args, **kwargs) - self.dvc_store_url = store_url - - -class DVCDownload(BaseDVC): - """Task DVC Download object, declare behavior for DVC Download task to dolphinscheduler.""" - - dvc_task_type = DvcTaskType.DOWNLOAD - - _child_task_dvc_attr = { - "dvc_load_save_data_path", - "dvc_data_location", - "dvc_version", - } - - def __init__( - self, - name: str, - repository: str, - data_path_in_dvc_repository: str, - data_path_in_worker: str, - version: str, - *args, - **kwargs - ): - super().__init__(name, repository, *args, **kwargs) - self.dvc_data_location = data_path_in_dvc_repository - self.dvc_load_save_data_path = data_path_in_worker - self.dvc_version = version - - -class DVCUpload(BaseDVC): - """Task DVC Upload object, declare behavior for DVC Upload task to dolphinscheduler.""" - - dvc_task_type = DvcTaskType.UPLOAD - - _child_task_dvc_attr = { - "dvc_load_save_data_path", - "dvc_data_location", - "dvc_version", - "dvc_message", - } - - def __init__( - self, - name: str, - repository: str, - data_path_in_worker: str, - data_path_in_dvc_repository: str, - version: str, - message: str, - *args, - **kwargs - ): - super().__init__(name, repository, *args, **kwargs) - self.dvc_data_location = data_path_in_dvc_repository - self.dvc_load_save_data_path = data_path_in_worker - self.dvc_version = version - self.dvc_message = message diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/flink.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/flink.py deleted file mode 100644 index 83cae956a5..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/flink.py +++ /dev/null @@ -1,93 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task Flink.""" - -from typing import Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.engine import Engine, ProgramType - - -class FlinkVersion(str): - """Flink version, for now it just contain `HIGHT` and `LOW`.""" - - LOW_VERSION = "<1.10" - HIGHT_VERSION = ">=1.10" - - -class DeployMode(str): - """Flink deploy mode, for now it just contain `LOCAL` and `CLUSTER`.""" - - LOCAL = "local" - CLUSTER = "cluster" - - -class Flink(Engine): - """Task flink object, declare behavior for flink task to dolphinscheduler.""" - - _task_custom_attr = { - "deploy_mode", - "flink_version", - "slot", - "task_manager", - "job_manager_memory", - "task_manager_memory", - "app_name", - "parallelism", - "main_args", - "others", - } - - def __init__( - self, - name: str, - main_class: str, - main_package: str, - program_type: Optional[ProgramType] = ProgramType.SCALA, - deploy_mode: Optional[DeployMode] = DeployMode.CLUSTER, - flink_version: Optional[FlinkVersion] = FlinkVersion.LOW_VERSION, - app_name: Optional[str] = None, - job_manager_memory: Optional[str] = "1G", - task_manager_memory: Optional[str] = "2G", - slot: Optional[int] = 1, - task_manager: Optional[int] = 2, - parallelism: Optional[int] = 1, - main_args: Optional[str] = None, - others: Optional[str] = None, - *args, - **kwargs - ): - super().__init__( - name, - TaskType.FLINK, - main_class, - main_package, - program_type, - *args, - **kwargs - ) - self.deploy_mode = deploy_mode - self.flink_version = flink_version - self.app_name = app_name - self.job_manager_memory = job_manager_memory - self.task_manager_memory = task_manager_memory - self.slot = slot - self.task_manager = task_manager - self.parallelism = parallelism - self.main_args = main_args - self.others = others diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/func_wrap.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/func_wrap.py deleted file mode 100644 index c0b73a1fc2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/func_wrap.py +++ /dev/null @@ -1,61 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task function wrapper allows using decorator to create a task.""" - -import functools -import inspect -import itertools -import types - -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.tasks.python import Python - - -def _get_func_str(func: types.FunctionType) -> str: - """Get Python function string without indent from decorator. - - :param func: The function which wraps by decorator ``@task``. - """ - lines = inspect.getsourcelines(func)[0] - - src_strip = "" - lead_space_num = None - for line in lines: - if lead_space_num is None: - lead_space_num = sum(1 for _ in itertools.takewhile(str.isspace, line)) - if line.strip() == "@task": - continue - elif line.strip().startswith("@"): - raise PyDSParamException( - "Do no support other decorators for function ``task`` decorator." - ) - src_strip += line[lead_space_num:] - return src_strip - - -def task(func: types.FunctionType): - """Decorate which covert Python function into pydolphinscheduler task.""" - - @functools.wraps(func) - def wrapper(*args, **kwargs): - func_str = _get_func_str(func) - return Python( - name=kwargs.get("name", func.__name__), definition=func_str, *args, **kwargs - ) - - return wrapper diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/http.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/http.py deleted file mode 100644 index 781333d481..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/http.py +++ /dev/null @@ -1,101 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task shell.""" - -from typing import Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSParamException - - -class HttpMethod: - """Constant of HTTP method.""" - - GET = "GET" - POST = "POST" - HEAD = "HEAD" - PUT = "PUT" - DELETE = "DELETE" - - -class HttpCheckCondition: - """Constant of HTTP check condition. - - For now it contain four value: - - STATUS_CODE_DEFAULT: when http response code equal to 200, mark as success. - - STATUS_CODE_CUSTOM: when http response code equal to the code user define, mark as success. - - BODY_CONTAINS: when http response body contain text user define, mark as success. - - BODY_NOT_CONTAINS: when http response body do not contain text user define, mark as success. - """ - - STATUS_CODE_DEFAULT = "STATUS_CODE_DEFAULT" - STATUS_CODE_CUSTOM = "STATUS_CODE_CUSTOM" - BODY_CONTAINS = "BODY_CONTAINS" - BODY_NOT_CONTAINS = "BODY_NOT_CONTAINS" - - -class Http(Task): - """Task HTTP object, declare behavior for HTTP task to dolphinscheduler.""" - - _task_custom_attr = { - "url", - "http_method", - "http_params", - "http_check_condition", - "condition", - "connect_timeout", - "socket_timeout", - } - - def __init__( - self, - name: str, - url: str, - http_method: Optional[str] = HttpMethod.GET, - http_params: Optional[str] = None, - http_check_condition: Optional[str] = HttpCheckCondition.STATUS_CODE_DEFAULT, - condition: Optional[str] = None, - connect_timeout: Optional[int] = 60000, - socket_timeout: Optional[int] = 60000, - *args, - **kwargs - ): - super().__init__(name, TaskType.HTTP, *args, **kwargs) - self.url = url - if not hasattr(HttpMethod, http_method): - raise PyDSParamException( - "Parameter http_method %s not support.", http_method - ) - self.http_method = http_method - self.http_params = http_params or [] - if not hasattr(HttpCheckCondition, http_check_condition): - raise PyDSParamException( - "Parameter http_check_condition %s not support.", http_check_condition - ) - self.http_check_condition = http_check_condition - if ( - http_check_condition != HttpCheckCondition.STATUS_CODE_DEFAULT - and condition is None - ): - raise PyDSParamException( - "Parameter condition must provider if http_check_condition not equal to STATUS_CODE_DEFAULT" - ) - self.condition = condition - self.connect_timeout = connect_timeout - self.socket_timeout = socket_timeout diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/map_reduce.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/map_reduce.py deleted file mode 100644 index 5050bd3cf1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/map_reduce.py +++ /dev/null @@ -1,52 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task MR.""" - -from typing import Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.engine import Engine, ProgramType - - -class MR(Engine): - """Task mr object, declare behavior for mr task to dolphinscheduler.""" - - _task_custom_attr = { - "app_name", - "main_args", - "others", - } - - def __init__( - self, - name: str, - main_class: str, - main_package: str, - program_type: Optional[ProgramType] = ProgramType.SCALA, - app_name: Optional[str] = None, - main_args: Optional[str] = None, - others: Optional[str] = None, - *args, - **kwargs - ): - super().__init__( - name, TaskType.MR, main_class, main_package, program_type, *args, **kwargs - ) - self.app_name = app_name - self.main_args = main_args - self.others = others diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py deleted file mode 100644 index e86797aadf..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/mlflow.py +++ /dev/null @@ -1,256 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task mlflow.""" -from copy import deepcopy -from typing import Dict, Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task - - -class MLflowTaskType(str): - """MLflow task type.""" - - MLFLOW_PROJECTS = "MLflow Projects" - MLFLOW_MODELS = "MLflow Models" - - -class MLflowJobType(str): - """MLflow job type.""" - - AUTOML = "AutoML" - BASIC_ALGORITHM = "BasicAlgorithm" - CUSTOM_PROJECT = "CustomProject" - - -class MLflowDeployType(str): - """MLflow deploy type.""" - - MLFLOW = "MLFLOW" - DOCKER = "DOCKER" - - -DEFAULT_MLFLOW_TRACKING_URI = "http://127.0.0.1:5000" -DEFAULT_VERSION = "master" - - -class BaseMLflow(Task): - """Base MLflow task.""" - - mlflow_task_type = None - - _task_custom_attr = { - "mlflow_tracking_uri", - "mlflow_task_type", - } - - _child_task_mlflow_attr = set() - - def __init__(self, name: str, mlflow_tracking_uri: str, *args, **kwargs): - super().__init__(name, TaskType.MLFLOW, *args, **kwargs) - self.mlflow_tracking_uri = mlflow_tracking_uri - - @property - def task_params(self) -> Dict: - """Return task params.""" - self._task_custom_attr = deepcopy(self._task_custom_attr) - self._task_custom_attr.update(self._child_task_mlflow_attr) - return super().task_params - - -class MLflowModels(BaseMLflow): - """Task MLflow models object, declare behavior for MLflow models task to dolphinscheduler. - - Deploy machine learning models in diverse serving environments. - - :param name: task name - :param model_uri: Model-URI of MLflow , support models://suffix format and runs:/ format. - See https://mlflow.org/docs/latest/tracking.html#artifact-stores - :param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 - :param deploy_mode: MLflow deploy mode, support MLFLOW, DOCKER, default is DOCKER - :param port: deploy port, default is 7000 - """ - - mlflow_task_type = MLflowTaskType.MLFLOW_MODELS - - _child_task_mlflow_attr = { - "deploy_type", - "deploy_model_key", - "deploy_port", - } - - def __init__( - self, - name: str, - model_uri: str, - mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, - deploy_mode: Optional[str] = MLflowDeployType.DOCKER, - port: Optional[int] = 7000, - *args, - **kwargs - ): - """Init mlflow models task.""" - super().__init__(name, mlflow_tracking_uri, *args, **kwargs) - self.deploy_type = deploy_mode.upper() - self.deploy_model_key = model_uri - self.deploy_port = port - - -class MLFlowProjectsCustom(BaseMLflow): - """Task MLflow projects object, declare behavior for MLflow Custom projects task to dolphinscheduler. - - :param name: task name - :param repository: Repository url of MLflow Project, Support git address and directory on worker. - If it's in a subdirectory, We add # to support this (same as mlflow run) , - for example https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native. - :param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 - :param experiment_name: MLflow experiment name, default is empty - :param parameters: MLflow project parameters, default is empty - :param version: MLflow project version, default is master - - """ - - mlflow_task_type = MLflowTaskType.MLFLOW_PROJECTS - mlflow_job_type = MLflowJobType.CUSTOM_PROJECT - - _child_task_mlflow_attr = { - "mlflow_job_type", - "experiment_name", - "params", - "mlflow_project_repository", - "mlflow_project_version", - } - - def __init__( - self, - name: str, - repository: str, - mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, - experiment_name: Optional[str] = "", - parameters: Optional[str] = "", - version: Optional[str] = "master", - *args, - **kwargs - ): - """Init mlflow projects task.""" - super().__init__(name, mlflow_tracking_uri, *args, **kwargs) - self.mlflow_project_repository = repository - self.experiment_name = experiment_name - self.params = parameters - self.mlflow_project_version = version - - -class MLFlowProjectsAutoML(BaseMLflow): - """Task MLflow projects object, declare behavior for AutoML task to dolphinscheduler. - - :param name: task name - :param data_path: data path of MLflow Project, Support git address and directory on worker. - :param automl_tool: The AutoML tool used, currently supports autosklearn and flaml. - :param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 - :param experiment_name: MLflow experiment name, default is empty - :param model_name: MLflow model name, default is empty - :param parameters: MLflow project parameters, default is empty - - """ - - mlflow_task_type = MLflowTaskType.MLFLOW_PROJECTS - mlflow_job_type = MLflowJobType.AUTOML - - _child_task_mlflow_attr = { - "mlflow_job_type", - "experiment_name", - "model_name", - "register_model", - "data_path", - "params", - "automl_tool", - } - - def __init__( - self, - name: str, - data_path: str, - automl_tool: Optional[str] = "flaml", - mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, - experiment_name: Optional[str] = "", - model_name: Optional[str] = "", - parameters: Optional[str] = "", - *args, - **kwargs - ): - """Init mlflow projects task.""" - super().__init__(name, mlflow_tracking_uri, *args, **kwargs) - self.data_path = data_path - self.experiment_name = experiment_name - self.model_name = model_name - self.params = parameters - self.automl_tool = automl_tool.lower() - self.register_model = bool(model_name) - - -class MLFlowProjectsBasicAlgorithm(BaseMLflow): - """Task MLflow projects object, declare behavior for BasicAlgorithm task to dolphinscheduler. - - :param name: task name - :param data_path: data path of MLflow Project, Support git address and directory on worker. - :param algorithm: The selected algorithm currently supports LR, SVM, LightGBM and XGboost - based on scikit-learn form. - :param mlflow_tracking_uri: MLflow tracking server uri, default is http://127.0.0.1:5000 - :param experiment_name: MLflow experiment name, default is empty - :param model_name: MLflow model name, default is empty - :param parameters: MLflow project parameters, default is empty - :param search_params: Whether to search the parameters, default is empty - - """ - - mlflow_job_type = MLflowJobType.BASIC_ALGORITHM - mlflow_task_type = MLflowTaskType.MLFLOW_PROJECTS - - _child_task_mlflow_attr = { - "mlflow_job_type", - "experiment_name", - "model_name", - "register_model", - "data_path", - "params", - "algorithm", - "search_params", - } - - def __init__( - self, - name: str, - data_path: str, - algorithm: Optional[str] = "lightgbm", - mlflow_tracking_uri: Optional[str] = DEFAULT_MLFLOW_TRACKING_URI, - experiment_name: Optional[str] = "", - model_name: Optional[str] = "", - parameters: Optional[str] = "", - search_params: Optional[str] = "", - *args, - **kwargs - ): - """Init mlflow projects task.""" - super().__init__(name, mlflow_tracking_uri, *args, **kwargs) - self.data_path = data_path - self.experiment_name = experiment_name - self.model_name = model_name - self.params = parameters - self.algorithm = algorithm.lower() - self.search_params = search_params - self.register_model = bool(model_name) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/openmldb.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/openmldb.py deleted file mode 100644 index 5dad36ec11..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/openmldb.py +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task OpenMLDB.""" - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task - - -class OpenMLDB(Task): - """Task OpenMLDB object, declare behavior for OpenMLDB task to dolphinscheduler. - - :param name: task name - :param zookeeper: OpenMLDB cluster zookeeper address, e.g. 127.0.0.1:2181. - :param zookeeper_path: OpenMLDB cluster zookeeper path, e.g. /openmldb. - :param execute_mode: Determine the init mode, offline or online. You can switch it in sql statementself. - :param sql: SQL statement. - """ - - _task_custom_attr = { - "zk", - "zk_path", - "execute_mode", - "sql", - } - - def __init__( - self, name, zookeeper, zookeeper_path, execute_mode, sql, *args, **kwargs - ): - super().__init__(name, TaskType.OPENMLDB, *args, **kwargs) - self.zk = zookeeper - self.zk_path = zookeeper_path - self.execute_mode = execute_mode - self.sql = sql diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/procedure.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/procedure.py deleted file mode 100644 index 6383e075ab..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/procedure.py +++ /dev/null @@ -1,60 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task procedure.""" - -from typing import Dict - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.database import Database -from pydolphinscheduler.core.task import Task - - -class Procedure(Task): - """Task Procedure object, declare behavior for Procedure task to dolphinscheduler. - - It should run database procedure job in multiply sql lik engine, such as: - - ClickHouse - - DB2 - - HIVE - - MySQL - - Oracle - - Postgresql - - Presto - - SQLServer - You provider datasource_name contain connection information, it decisions which - database type and database instance would run this sql. - """ - - _task_custom_attr = {"method"} - - def __init__(self, name: str, datasource_name: str, method: str, *args, **kwargs): - super().__init__(name, TaskType.PROCEDURE, *args, **kwargs) - self.datasource_name = datasource_name - self.method = method - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for produce task. - - produce task have some specials attribute for task_params, and is odd if we - directly set as python property, so we Override Task.task_params here. - """ - params = super().task_params - datasource = Database(self.datasource_name, "type", "datasource") - params.update(datasource) - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/python.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/python.py deleted file mode 100644 index 593cc52cc2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/python.py +++ /dev/null @@ -1,105 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task Python.""" - -import inspect -import logging -import re -import types -from typing import Union - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSParamException - -log = logging.getLogger(__file__) - - -class Python(Task): - """Task Python object, declare behavior for Python task to dolphinscheduler. - - Python task support two types of parameters for :param:``definition``, and here is an example: - - Using str type of :param:``definition`` - - .. code-block:: python - - python_task = Python(name="str_type", definition="print('Hello Python task.')") - - Or using Python callable type of :param:``definition`` - - .. code-block:: python - - def foo(): - print("Hello Python task.") - - python_task = Python(name="str_type", definition=foo) - - :param name: The name for Python task. It define the task name. - :param definition: String format of Python script you want to execute or Python callable you - want to execute. - """ - - _task_custom_attr = {"raw_script", "definition"} - - ext: set = {".py"} - ext_attr: Union[str, types.FunctionType] = "_definition" - - def __init__( - self, name: str, definition: Union[str, types.FunctionType], *args, **kwargs - ): - self._definition = definition - super().__init__(name, TaskType.PYTHON, *args, **kwargs) - - def _build_exe_str(self) -> str: - """Build executable string from given definition. - - Attribute ``self.definition`` almost is a function, we need to call this function after parsing it - to string. The easier way to call a function is using syntax ``func()`` and we use it to call it too. - """ - definition = getattr(self, "definition") - if isinstance(definition, types.FunctionType): - py_function = inspect.getsource(definition) - func_str = f"{py_function}{definition.__name__}()" - else: - pattern = re.compile("^def (\\w+)\\(") - find = pattern.findall(definition) - if not find: - log.warning( - "Python definition is simple script instead of function, with value %s", - definition, - ) - return definition - # Keep function str and function callable always have one blank line - func_str = ( - f"{definition}{find[0]}()" - if definition.endswith("\n") - else f"{definition}\n{find[0]}()" - ) - return func_str - - @property - def raw_script(self) -> str: - """Get python task define attribute `raw_script`.""" - if isinstance(getattr(self, "definition"), (str, types.FunctionType)): - return self._build_exe_str() - else: - raise PyDSParamException( - "Parameter definition do not support % for now.", - type(getattr(self, "definition")), - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/pytorch.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/pytorch.py deleted file mode 100644 index 4767f7ecee..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/pytorch.py +++ /dev/null @@ -1,95 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task Pytorch.""" -from typing import Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task - - -class DEFAULT: - """Default values for Pytorch.""" - - is_create_environment = False - project_path = "." - python_command = "${PYTHON_HOME}" - - -class Pytorch(Task): - """Task Pytorch object, declare behavior for Pytorch task to dolphinscheduler. - - See also: `DolphinScheduler Pytorch Task Plugin - `_ - - :param name: task name - :param script: Entry to the Python script file that you want to run. - :param script_params: Input parameters at run time. - :param project_path: The path to the project. Default "." . - :param is_create_environment: is create environment. Default False. - :param python_command: The path to the python command. Default "${PYTHON_HOME}". - :param python_env_tool: The python environment tool. Default "conda". - :param requirements: The path to the requirements.txt file. Default "requirements.txt". - :param conda_python_version: The python version of conda environment. Default "3.7". - """ - - _task_custom_attr = { - "script", - "script_params", - "other_params", - "python_path", - "is_create_environment", - "python_command", - "python_env_tool", - "requirements", - "conda_python_version", - } - - def __init__( - self, - name: str, - script: str, - script_params: str = "", - project_path: Optional[str] = DEFAULT.project_path, - is_create_environment: Optional[bool] = DEFAULT.is_create_environment, - python_command: Optional[str] = DEFAULT.python_command, - python_env_tool: Optional[str] = "conda", - requirements: Optional[str] = "requirements.txt", - conda_python_version: Optional[str] = "3.7", - *args, - **kwargs, - ): - """Init Pytorch task.""" - super().__init__(name, TaskType.PYTORCH, *args, **kwargs) - self.script = script - self.script_params = script_params - self.is_create_environment = is_create_environment - self.python_path = project_path - self.python_command = python_command - self.python_env_tool = python_env_tool - self.requirements = requirements - self.conda_python_version = conda_python_version - - @property - def other_params(self): - """Return other params.""" - conds = [ - self.is_create_environment != DEFAULT.is_create_environment, - self.python_path != DEFAULT.project_path, - self.python_command != DEFAULT.python_command, - ] - return any(conds) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sagemaker.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sagemaker.py deleted file mode 100644 index 30b128d172..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sagemaker.py +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task SageMaker.""" - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task - - -class SageMaker(Task): - """Task SageMaker object, declare behavior for SageMaker task to dolphinscheduler. - - :param name: A unique, meaningful string for the SageMaker task. - :param sagemaker_request_json: Request parameters of StartPipelineExecution, - see also `AWS API - `_ - - """ - - _task_custom_attr = { - "sagemaker_request_json", - } - - def __init__(self, name: str, sagemaker_request_json: str, *args, **kwargs): - super().__init__(name, TaskType.SAGEMAKER, *args, **kwargs) - self.sagemaker_request_json = sagemaker_request_json diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py deleted file mode 100644 index 36ec4e87d0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py +++ /dev/null @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task shell.""" - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task - - -class Shell(Task): - """Task shell object, declare behavior for shell task to dolphinscheduler. - - :param name: A unique, meaningful string for the shell task. - :param command: One or more command want to run in this task. - - It could be simply command:: - - Shell(name=..., command="echo task shell") - - or maybe same commands trying to do complex task:: - - command = '''echo task shell step 1; - echo task shell step 2; - echo task shell step 3 - ''' - - Shell(name=..., command=command) - - """ - - # TODO maybe we could use instance name to replace attribute `name` - # which is simplify as `task_shell = Shell(command = "echo 1")` and - # task.name assign to `task_shell` - - _task_custom_attr = { - "raw_script", - } - - ext: set = {".sh", ".zsh"} - ext_attr: str = "_raw_script" - - def __init__(self, name: str, command: str, *args, **kwargs): - self._raw_script = command - super().__init__(name, TaskType.SHELL, *args, **kwargs) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/spark.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/spark.py deleted file mode 100644 index eb9c621043..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/spark.py +++ /dev/null @@ -1,84 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task Spark.""" - -from typing import Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.engine import Engine, ProgramType - - -class DeployMode(str): - """SPARK deploy mode, for now it just contain `LOCAL`, `CLIENT` and `CLUSTER`.""" - - LOCAL = "local" - CLIENT = "client" - CLUSTER = "cluster" - - -class Spark(Engine): - """Task spark object, declare behavior for spark task to dolphinscheduler.""" - - _task_custom_attr = { - "deploy_mode", - "driver_cores", - "driver_memory", - "num_executors", - "executor_memory", - "executor_cores", - "app_name", - "main_args", - "others", - } - - def __init__( - self, - name: str, - main_class: str, - main_package: str, - program_type: Optional[ProgramType] = ProgramType.SCALA, - deploy_mode: Optional[DeployMode] = DeployMode.CLUSTER, - app_name: Optional[str] = None, - driver_cores: Optional[int] = 1, - driver_memory: Optional[str] = "512M", - num_executors: Optional[int] = 2, - executor_memory: Optional[str] = "2G", - executor_cores: Optional[int] = 2, - main_args: Optional[str] = None, - others: Optional[str] = None, - *args, - **kwargs - ): - super().__init__( - name, - TaskType.SPARK, - main_class, - main_package, - program_type, - *args, - **kwargs - ) - self.deploy_mode = deploy_mode - self.app_name = app_name - self.driver_cores = driver_cores - self.driver_memory = driver_memory - self.num_executors = num_executors - self.executor_memory = executor_memory - self.executor_cores = executor_cores - self.main_args = main_args - self.others = others diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py deleted file mode 100644 index 4bebf8379d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py +++ /dev/null @@ -1,122 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task sql.""" - -import logging -import re -from typing import Dict, Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.database import Database -from pydolphinscheduler.core.task import Task - -log = logging.getLogger(__file__) - - -class SqlType: - """SQL type, for now it just contain `SELECT` and `NO_SELECT`.""" - - SELECT = "0" - NOT_SELECT = "1" - - -class Sql(Task): - """Task SQL object, declare behavior for SQL task to dolphinscheduler. - - It should run sql job in multiply sql lik engine, such as: - - ClickHouse - - DB2 - - HIVE - - MySQL - - Oracle - - Postgresql - - Presto - - SQLServer - You provider datasource_name contain connection information, it decisions which - database type and database instance would run this sql. - """ - - _task_custom_attr = { - "sql", - "sql_type", - "pre_statements", - "post_statements", - "display_rows", - } - - ext: set = {".sql"} - ext_attr: str = "_sql" - - def __init__( - self, - name: str, - datasource_name: str, - sql: str, - sql_type: Optional[str] = None, - pre_statements: Optional[str] = None, - post_statements: Optional[str] = None, - display_rows: Optional[int] = 10, - *args, - **kwargs - ): - self._sql = sql - super().__init__(name, TaskType.SQL, *args, **kwargs) - self.param_sql_type = sql_type - self.datasource_name = datasource_name - self.pre_statements = pre_statements or [] - self.post_statements = post_statements or [] - self.display_rows = display_rows - - @property - def sql_type(self) -> str: - """Judgement sql type, it will return the SQL type for type `SELECT` or `NOT_SELECT`. - - If `param_sql_type` dot not specific, will use regexp to check - which type of the SQL is. But if `param_sql_type` is specific - will use the parameter overwrites the regexp way - """ - if ( - self.param_sql_type == SqlType.SELECT - or self.param_sql_type == SqlType.NOT_SELECT - ): - log.info( - "The sql type is specified by a parameter, with value %s", - self.param_sql_type, - ) - return self.param_sql_type - pattern_select_str = ( - "^(?!(.* |)insert |(.* |)delete |(.* |)drop " - "|(.* |)update |(.* |)truncate |(.* |)alter |(.* |)create ).*" - ) - pattern_select = re.compile(pattern_select_str, re.IGNORECASE) - if pattern_select.match(self._sql) is None: - return SqlType.NOT_SELECT - else: - return SqlType.SELECT - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for sql task. - - sql task have some specials attribute for task_params, and is odd if we - directly set as python property, so we Override Task.task_params here. - """ - params = super().task_params - datasource = Database(self.datasource_name, "type", "datasource") - params.update(datasource) - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sub_process.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sub_process.py deleted file mode 100644 index c7a9f8bd11..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sub_process.py +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task sub_process.""" - -from typing import Dict - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSProcessDefinitionNotAssignException -from pydolphinscheduler.java_gateway import JavaGate - - -class SubProcess(Task): - """Task SubProcess object, declare behavior for SubProcess task to dolphinscheduler.""" - - _task_custom_attr = {"process_definition_code"} - - def __init__(self, name: str, process_definition_name: str, *args, **kwargs): - super().__init__(name, TaskType.SUB_PROCESS, *args, **kwargs) - self.process_definition_name = process_definition_name - - @property - def process_definition_code(self) -> str: - """Get process definition code, a wrapper for :func:`get_process_definition_info`.""" - return self.get_process_definition_info(self.process_definition_name).get( - "code" - ) - - def get_process_definition_info(self, process_definition_name: str) -> Dict: - """Get process definition info from java gateway, contains process definition id, name, code.""" - if not self.process_definition: - raise PyDSProcessDefinitionNotAssignException( - "ProcessDefinition must be provider for task SubProcess." - ) - return JavaGate().get_process_definition_info( - self.process_definition.user.name, - self.process_definition.project.name, - process_definition_name, - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/switch.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/switch.py deleted file mode 100644 index 45edaa9aac..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/switch.py +++ /dev/null @@ -1,166 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Task Switch.""" - -from typing import Dict, Optional - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.core.task import Task -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.models.base import Base - - -class SwitchBranch(Base): - """Base class of ConditionBranch of task switch. - - It a parent class for :class:`Branch` and :class:`Default`. - """ - - _DEFINE_ATTR = { - "next_node", - } - - def __init__(self, task: Task, exp: Optional[str] = None): - super().__init__(f"Switch.{self.__class__.__name__.upper()}") - self.task = task - self.exp = exp - - @property - def next_node(self) -> str: - """Get task switch property next_node, it return task code when init class switch.""" - return self.task.code - - @property - def condition(self) -> Optional[str]: - """Get task switch property condition.""" - return self.exp - - def get_define(self, camel_attr: bool = True) -> Dict: - """Get :class:`ConditionBranch` definition attribute communicate to Java gateway server.""" - if self.condition: - self._DEFINE_ATTR.add("condition") - return super().get_define() - - -class Branch(SwitchBranch): - """Common condition branch for switch task. - - If any condition in :class:`Branch` match, would set this :class:`Branch`'s task as downstream of task - switch. If all condition branch do not match would set :class:`Default`'s task as task switch downstream. - """ - - def __init__(self, condition: str, task: Task): - super().__init__(task, condition) - - -class Default(SwitchBranch): - """Class default branch for switch task. - - If all condition of :class:`Branch` do not match, task switch would run the tasks in :class:`Default` - and set :class:`Default`'s task as switch downstream. Please notice that each switch condition - could only have one single :class:`Default`. - """ - - def __init__(self, task: Task): - super().__init__(task) - - -class SwitchCondition(Base): - """Set switch condition of given parameter.""" - - _DEFINE_ATTR = { - "depend_task_list", - } - - def __init__(self, *args): - super().__init__(self.__class__.__name__) - self.args = args - - def set_define_attr(self) -> None: - """Set attribute to function :func:`get_define`. - - It is a wrapper for both `And` and `Or` operator. - """ - result = [] - num_branch_default = 0 - for condition in self.args: - if not isinstance(condition, SwitchBranch): - raise PyDSParamException( - "Task Switch's parameter only support SwitchBranch but got %s.", - type(condition), - ) - # Default number branch checker - if num_branch_default >= 1 and isinstance(condition, Default): - raise PyDSParamException( - "Task Switch's parameter only support exactly one default branch." - ) - if isinstance(condition, Default): - self._DEFINE_ATTR.add("next_node") - setattr(self, "next_node", condition.next_node) - num_branch_default += 1 - elif isinstance(condition, Branch): - result.append(condition.get_define()) - # Handle switch default branch, default value is `""` if not provide. - if num_branch_default == 0: - self._DEFINE_ATTR.add("next_node") - setattr(self, "next_node", "") - setattr(self, "depend_task_list", result) - - def get_define(self, camel_attr=True) -> Dict: - """Overwrite Base.get_define to get task Condition specific get define.""" - self.set_define_attr() - return super().get_define() - - -class Switch(Task): - """Task switch object, declare behavior for switch task to dolphinscheduler. - - Param of process definition or at least one local param of task must be set - if task `switch` in this workflow. - """ - - _task_ignore_attr = { - "condition_result", - "dependence", - } - - def __init__(self, name: str, condition: SwitchCondition, *args, **kwargs): - super().__init__(name, TaskType.SWITCH, *args, **kwargs) - self.condition = condition - # Set condition tasks as current task downstream - self._set_dep() - - def _set_dep(self) -> None: - """Set downstream according to parameter `condition`.""" - downstream = [] - for condition in self.condition.args: - if isinstance(condition, SwitchBranch): - downstream.append(condition.task) - self.set_downstream(downstream) - - @property - def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict: - """Override Task.task_params for switch task. - - switch task have some specials attribute `switch`, and in most of the task - this attribute is None and use empty dict `{}` as default value. We do not use class - attribute `_task_custom_attr` due to avoid attribute cover. - """ - params = super().task_params - params["switchResult"] = self.condition.get_define() - return params diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/__init__.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/__init__.py deleted file mode 100644 index f8d3fbf62f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init utils package.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/date.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/date.py deleted file mode 100644 index 18cf93e318..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/date.py +++ /dev/null @@ -1,82 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Date util function collections.""" - -from datetime import datetime - -from pydolphinscheduler.constants import Delimiter, Time - -LEN_SUPPORT_DATETIME = ( - 15, - 19, -) - -FMT_SHORT = f"{Time.FMT_SHORT_DATE} {Time.FMT_NO_COLON_TIME}" -FMT_DASH = f"{Time.FMT_DASH_DATE} {Time.FMT_STD_TIME}" -FMT_STD = f"{Time.FMT_STD_DATE} {Time.FMT_STD_TIME}" - -MAX_DATETIME = datetime(9999, 12, 31, 23, 59, 59) - - -def conv_to_schedule(src: datetime) -> str: - """Convert given datetime to schedule date string.""" - return datetime.strftime(src, FMT_STD) - - -def conv_from_str(src: str) -> datetime: - """Convert given string to datetime. - - This function give an ability to convert string to datetime, and for now it could handle - format like: - - %Y-%m-%d - - %Y/%m/%d - - %Y%m%d - - %Y-%m-%d %H:%M:%S - - %Y/%m/%d %H:%M:%S - - %Y%m%d %H%M%S - If pattern not like above be given will raise NotImplementedError. - """ - len_ = len(src) - if len_ == Time.LEN_SHORT_DATE: - return datetime.strptime(src, Time.FMT_SHORT_DATE) - elif len_ == Time.LEN_STD_DATE: - if Delimiter.BAR in src: - return datetime.strptime(src, Time.FMT_STD_DATE) - elif Delimiter.DASH in src: - return datetime.strptime(src, Time.FMT_DASH_DATE) - else: - raise NotImplementedError( - "%s could not be convert to datetime for now.", src - ) - elif len_ in LEN_SUPPORT_DATETIME: - if Delimiter.BAR in src and Delimiter.COLON in src: - return datetime.strptime(src, FMT_STD) - elif Delimiter.DASH in src and Delimiter.COLON in src: - return datetime.strptime(src, FMT_DASH) - elif ( - Delimiter.DASH not in src - and Delimiter.BAR not in src - and Delimiter.COLON not in src - ): - return datetime.strptime(src, FMT_SHORT) - else: - raise NotImplementedError( - "%s could not be convert to datetime for now.", src - ) - else: - raise NotImplementedError("%s could not be convert to datetime for now.", src) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/file.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/file.py deleted file mode 100644 index 075b9025b2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/file.py +++ /dev/null @@ -1,57 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""File util for pydolphinscheduler.""" - -from pathlib import Path -from typing import Optional - - -def write( - content: str, - to_path: str, - create: Optional[bool] = True, - overwrite: Optional[bool] = False, -) -> None: - """Write configs dict to configuration file. - - :param content: The source string want to write to :param:`to_path`. - :param to_path: The path want to write content. - :param create: Whether create the file parent directory or not if it does not exist. - If set ``True`` will create file with :param:`to_path` if path not exists, otherwise - ``False`` will not create. Default ``True``. - :param overwrite: Whether overwrite the file or not if it exists. If set ``True`` - will overwrite the exists content, otherwise ``False`` will not overwrite it. Default ``True``. - """ - path = Path(to_path) - if not path.parent.exists(): - if create: - path.parent.mkdir(parents=True) - else: - raise ValueError( - "Parent directory do not exists and set param `create` to `False`." - ) - if not path.exists(): - with path.open(mode="w") as f: - f.write(content) - elif overwrite: - with path.open(mode="w") as f: - f.write(content) - else: - raise FileExistsError( - "File %s already exists and you choose not overwrite mode.", to_path - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/string.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/string.py deleted file mode 100644 index e7e781c4d6..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/string.py +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""String util function collections.""" - -from pydolphinscheduler.constants import Delimiter - - -def attr2camel(attr: str, include_private=True): - """Covert class attribute name to camel case.""" - if include_private: - attr = attr.lstrip(Delimiter.UNDERSCORE) - return snake2camel(attr) - - -def snake2camel(snake: str): - """Covert snake case to camel case.""" - components = snake.split(Delimiter.UNDERSCORE) - return components[0] + "".join(x.title() for x in components[1:]) - - -def class_name2camel(class_name: str): - """Covert class name string to camel case.""" - class_name = class_name.lstrip(Delimiter.UNDERSCORE) - return class_name[0].lower() + snake2camel(class_name[1:]) diff --git a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/yaml_parser.py b/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/yaml_parser.py deleted file mode 100644 index 46ee08cec8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/yaml_parser.py +++ /dev/null @@ -1,159 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""YAML parser utils, parser yaml string to ``ruamel.yaml`` object and nested key dict.""" - -import copy -import io -from typing import Any, Dict, Optional - -from ruamel.yaml import YAML -from ruamel.yaml.comments import CommentedMap - - -class YamlParser: - """A parser to parse Yaml file and provider easier way to access or change value. - - This parser provider delimiter string key to get or set :class:`ruamel.yaml.YAML` object - - For example, yaml config named ``test.yaml`` and its content as below: - - .. code-block:: yaml - - one: - two1: - three: value1 - two2: value2 - - you could get ``value1`` and ``value2`` by nested path - - .. code-block:: python - - yaml_parser = YamlParser("test.yaml") - - # Use function ``get`` to get value - value1 = yaml_parser.get("one.two1.three") - # Or use build-in ``__getitem__`` to get value - value2 = yaml_parser["one.two2"] - - or you could change ``value1`` to ``value3``, also change ``value2`` to ``value4`` by nested path assigned - - .. code-block:: python - - yaml_parser["one.two1.three"] = "value3" - yaml_parser["one.two2"] = "value4" - """ - - def __init__(self, content: str, delimiter: Optional[str] = "."): - self._content = content - self.src_parser = content - self._delimiter = delimiter - - @property - def src_parser(self) -> CommentedMap: - """Get src_parser property.""" - return self._src_parser - - @src_parser.setter - def src_parser(self, content: str) -> None: - """Set src_parser property.""" - self._yaml = YAML() - self._src_parser = self._yaml.load(content) - - def parse_nested_dict( - self, result: Dict, commented_map: CommentedMap, key: str - ) -> None: - """Parse :class:`ruamel.yaml.comments.CommentedMap` to nested dict using :param:`delimiter`.""" - if not isinstance(commented_map, CommentedMap): - return - for sub_key in set(commented_map.keys()): - next_key = f"{key}{self._delimiter}{sub_key}" - result[next_key] = commented_map[sub_key] - self.parse_nested_dict(result, commented_map[sub_key], next_key) - - @property - def dict_parser(self) -> Dict: - """Get :class:`CommentedMap` to nested dict using :param:`delimiter` as key delimiter. - - Use Depth-First-Search get all nested key and value, and all key connect by :param:`delimiter`. - It make users could easier access or change :class:`CommentedMap` object. - - For example, yaml config named ``test.yaml`` and its content as below: - - .. code-block:: yaml - - one: - two1: - three: value1 - two2: value2 - - It could parser to nested dict as - - .. code-block:: python - - { - "one": ordereddict([('two1', ordereddict([('three', 'value1')])), ('two2', 'value2')]), - "one.two1": ordereddict([('three', 'value1')]), - "one.two1.three": "value1", - "one.two2": "value2", - } - """ - res = dict() - src_parser_copy = copy.deepcopy(self.src_parser) - - base_keys = set(src_parser_copy.keys()) - if not base_keys: - return res - else: - for key in base_keys: - res[key] = src_parser_copy[key] - self.parse_nested_dict(res, src_parser_copy[key], key) - return res - - def __contains__(self, key) -> bool: - return key in self.dict_parser - - def __getitem__(self, key: str) -> Any: - return self.dict_parser[key] - - def __setitem__(self, key: str, val: Any) -> None: - if key not in self.dict_parser: - raise KeyError("Key %s do not exists.", key) - - mid = None - keys = key.split(self._delimiter) - for idx, k in enumerate(keys, 1): - if idx == len(keys): - mid[k] = val - else: - mid = mid[k] if mid else self.src_parser[k] - - def get(self, key: str) -> Any: - """Get value by key, is call ``__getitem__``.""" - return self[key] - - def __str__(self) -> str: - """Transfer :class:`YamlParser` to string object. - - It is useful when users want to output the :class:`YamlParser` object they change just now. - """ - buf = io.StringIO() - self._yaml.dump(self.src_parser, buf) - return buf.getvalue() - - def __repr__(self) -> str: - return f"YamlParser({str(self)})" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/__init__.py deleted file mode 100644 index 5ce1f82a1a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init tests package.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/cli/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/cli/__init__.py deleted file mode 100644 index f1a4396af6..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/cli/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init command line interface tests.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/cli/test_config.py b/dolphinscheduler-python/pydolphinscheduler/tests/cli/test_config.py deleted file mode 100644 index 516ad754a2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/cli/test_config.py +++ /dev/null @@ -1,198 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test command line interface subcommand `config`.""" - -import os -from pathlib import Path - -import pytest - -from pydolphinscheduler.cli.commands import cli -from pydolphinscheduler.configuration import BUILD_IN_CONFIG_PATH, config_path -from tests.testing.cli import CliTestWrapper -from tests.testing.constants import DEV_MODE, ENV_PYDS_HOME -from tests.testing.file import get_file_content - -config_file = "config.yaml" - - -@pytest.fixture -def teardown_file_env(): - """Util for deleting temp configuration file and pop env var after test finish.""" - yield - config_file_path = config_path() - if config_file_path.exists(): - config_file_path.unlink() - # pop environment variable to keep test cases dependent - os.environ.pop(ENV_PYDS_HOME, None) - assert ENV_PYDS_HOME not in os.environ - - -@pytest.mark.parametrize( - "home", - [ - None, - "/tmp/pydolphinscheduler", - "/tmp/test_abc", - ], -) -def test_config_init(teardown_file_env, home): - """Test command line interface `config --init`.""" - if home: - os.environ[ENV_PYDS_HOME] = home - elif DEV_MODE: - pytest.skip( - "Avoid delete ~/pydolphinscheduler/config.yaml by accident when test locally." - ) - - config_file_path = config_path() - assert not config_file_path.exists() - - cli_test = CliTestWrapper(cli, ["config", "--init"]) - cli_test.assert_success() - - assert config_file_path.exists() - assert get_file_content(config_file_path) == get_file_content(BUILD_IN_CONFIG_PATH) - - -@pytest.mark.parametrize( - "key, expect", - [ - # We test each key in one single section - ("java_gateway.address", "127.0.0.1"), - ("default.user.name", "userPythonGateway"), - ("default.workflow.project", "project-pydolphin"), - ], -) -def test_config_get(teardown_file_env, key: str, expect: str): - """Test command line interface `config --get XXX`.""" - os.environ[ENV_PYDS_HOME] = "/tmp/pydolphinscheduler" - cli_test = CliTestWrapper(cli, ["config", "--init"]) - cli_test.assert_success() - - cli_test = CliTestWrapper(cli, ["config", "--get", key]) - cli_test.assert_success(output=f"{key} = {expect}", fuzzy=True) - - -@pytest.mark.parametrize( - "keys, expects", - [ - # We test mix section keys - (("java_gateway.address", "java_gateway.port"), ("127.0.0.1", "25333")), - ( - ("java_gateway.auto_convert", "default.user.tenant"), - ("True", "tenant_pydolphin"), - ), - ( - ( - "java_gateway.port", - "default.user.state", - "default.workflow.worker_group", - ), - ("25333", "1", "default"), - ), - ], -) -def test_config_get_multiple(teardown_file_env, keys: str, expects: str): - """Test command line interface `config --get KEY1 --get KEY2 ...`.""" - os.environ[ENV_PYDS_HOME] = "/tmp/pydolphinscheduler" - cli_test = CliTestWrapper(cli, ["config", "--init"]) - cli_test.assert_success() - - get_args = ["config"] - for key in keys: - get_args.append("--get") - get_args.append(key) - cli_test = CliTestWrapper(cli, get_args) - - for idx, expect in enumerate(expects): - cli_test.assert_success(output=f"{keys[idx]} = {expect}", fuzzy=True) - - -@pytest.mark.parametrize( - "key, value", - [ - # We test each key in one single section - ("java_gateway.address", "127.1.1.1"), - ("default.user.name", "editUserPythonGateway"), - ("default.workflow.project", "edit-project-pydolphin"), - ], -) -def test_config_set(teardown_file_env, key: str, value: str): - """Test command line interface `config --set KEY VALUE`.""" - path = "/tmp/pydolphinscheduler" - assert not Path(path).joinpath(config_file).exists() - os.environ[ENV_PYDS_HOME] = path - cli_test = CliTestWrapper(cli, ["config", "--init"]) - cli_test.assert_success() - - # Make sure value do not exists first - cli_test = CliTestWrapper(cli, ["config", "--get", key]) - assert f"{key} = {value}" not in cli_test.result.output - - cli_test = CliTestWrapper(cli, ["config", "--set", key, value]) - cli_test.assert_success() - - cli_test = CliTestWrapper(cli, ["config", "--get", key]) - assert f"{key} = {value}" in cli_test.result.output - - -@pytest.mark.parametrize( - "keys, values", - [ - # We test each key in mixture section - (("java_gateway.address", "java_gateway.port"), ("127.1.1.1", "25444")), - ( - ("java_gateway.auto_convert", "default.user.tenant"), - ("False", "edit_tenant_pydolphin"), - ), - ( - ( - "java_gateway.port", - "default.user.state", - "default.workflow.worker_group", - ), - ("25555", "0", "not-default"), - ), - ], -) -def test_config_set_multiple(teardown_file_env, keys: str, values: str): - """Test command line interface `config --set KEY1 VAL1 --set KEY2 VAL2`.""" - path = "/tmp/pydolphinscheduler" - assert not Path(path).joinpath(config_file).exists() - os.environ[ENV_PYDS_HOME] = path - cli_test = CliTestWrapper(cli, ["config", "--init"]) - cli_test.assert_success() - - set_args = ["config"] - for idx, key in enumerate(keys): - # Make sure values do not exists first - cli_test = CliTestWrapper(cli, ["config", "--get", key]) - assert f"{key} = {values[idx]}" not in cli_test.result.output - - set_args.append("--set") - set_args.append(key) - set_args.append(values[idx]) - - cli_test = CliTestWrapper(cli, set_args) - cli_test.assert_success() - - for idx, key in enumerate(keys): - # Make sure values exists after `config --set` run - cli_test = CliTestWrapper(cli, ["config", "--get", key]) - assert f"{key} = {values[idx]}" in cli_test.result.output diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/cli/test_version.py b/dolphinscheduler-python/pydolphinscheduler/tests/cli/test_version.py deleted file mode 100644 index b61d26da02..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/cli/test_version.py +++ /dev/null @@ -1,67 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test command line interface subcommand `version`.""" - -from unittest.mock import patch - -import pytest - -import pydolphinscheduler -from pydolphinscheduler.cli.commands import cli -from tests.testing.cli import CliTestWrapper - - -def test_version(): - """Test whether subcommand `version` correct.""" - cli_test = CliTestWrapper(cli, ["version"]) - cli_test.assert_success(output=f"{pydolphinscheduler.__version__}") - - -@pytest.mark.parametrize( - "version, part, idx", - [ - ("1.2.3", "major", 0), - ("0.1.3", "minor", 1), - ("3.1.0", "micro", 2), - ("1.2.3-beta-1", "micro", 2), - ("1.2.3-alpha", "micro", 2), - ("1.2.3a2", "micro", 2), - ("1.2.3b1", "micro", 2), - ], -) -@patch("pydolphinscheduler.__version__") -def test_version_part(mock_version, version: str, part: str, idx: int): - """Test subcommand `version` option `--part`.""" - mock_version.return_value = version - cli_test = CliTestWrapper(cli, ["version", "--part", part]) - cli_test.assert_success(output=f"{pydolphinscheduler.__version__.split('.')[idx]}") - - -@pytest.mark.parametrize( - "option, output", - [ - # not support option - (["version", "--not-support"], "No such option"), - # not support option value - (["version", "--part", "abc"], "Invalid value for '--part'"), - ], -) -def test_version_not_support_option(option, output): - """Test subcommand `version` not support option or option value.""" - cli_test = CliTestWrapper(cli, option) - cli_test.assert_fail(ret_code=2, output=output, fuzzy=True) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/__init__.py deleted file mode 100644 index 62ce0ea4ee..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init core package tests.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_configuration.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_configuration.py deleted file mode 100644 index b9dc8cb656..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_configuration.py +++ /dev/null @@ -1,272 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test class :mod:`pydolphinscheduler.core.configuration`' method.""" - -import importlib -import os -from pathlib import Path -from typing import Any - -import pytest - -from pydolphinscheduler import configuration -from pydolphinscheduler.configuration import ( - BUILD_IN_CONFIG_PATH, - config_path, - get_single_config, - set_single_config, -) -from pydolphinscheduler.exceptions import PyDSConfException -from pydolphinscheduler.utils.yaml_parser import YamlParser -from tests.testing.constants import DEV_MODE, ENV_PYDS_HOME -from tests.testing.file import get_file_content - - -@pytest.fixture -def teardown_file_env(): - """Util for deleting temp configuration file and pop env var after test finish.""" - yield - config_file_path = config_path() - if config_file_path.exists(): - config_file_path.unlink() - os.environ.pop(ENV_PYDS_HOME, None) - - -@pytest.mark.parametrize( - "val, expect", - [ - ("1", 1), - ("123", 123), - ("4567", 4567), - (b"1234", 1234), - ], -) -def test_get_int(val: Any, expect: int): - """Test function :func:`configuration.get_int`.""" - assert configuration.get_int(val) == expect - - -@pytest.mark.parametrize( - "val", - [ - "a", - "1a", - "1d2", - "1723-", - ], -) -def test_get_int_error(val: Any): - """Test function :func:`configuration.get_int`.""" - with pytest.raises(ValueError): - configuration.get_int(val) - - -@pytest.mark.parametrize( - "val, expect", - [ - ("t", True), - ("true", True), - (1, True), - (True, True), - ("f", False), - ("false", False), - (0, False), - (123, False), - ("abc", False), - ("abc1", False), - (False, False), - ], -) -def test_get_bool(val: Any, expect: bool): - """Test function :func:`configuration.get_bool`.""" - assert configuration.get_bool(val) == expect - - -@pytest.mark.parametrize( - "home, expect", - [ - (None, "~/pydolphinscheduler/config.yaml"), - ("/tmp/pydolphinscheduler", "/tmp/pydolphinscheduler/config.yaml"), - ("/tmp/test_abc", "/tmp/test_abc/config.yaml"), - ], -) -def test_config_path(home: Any, expect: str): - """Test function :func:`config_path`.""" - if home: - os.environ[ENV_PYDS_HOME] = home - assert Path(expect).expanduser() == configuration.config_path() - - -@pytest.mark.parametrize( - "home", - [ - None, - "/tmp/pydolphinscheduler", - "/tmp/test_abc", - ], -) -def test_init_config_file(teardown_file_env, home: Any): - """Test init config file.""" - if home: - os.environ[ENV_PYDS_HOME] = home - elif DEV_MODE: - pytest.skip( - "Avoid delete ~/pydolphinscheduler/config.yaml by accident when test locally." - ) - assert not config_path().exists() - configuration.init_config_file() - assert config_path().exists() - - assert get_file_content(config_path()) == get_file_content(BUILD_IN_CONFIG_PATH) - - -@pytest.mark.parametrize( - "home", - [ - None, - "/tmp/pydolphinscheduler", - "/tmp/test_abc", - ], -) -def test_init_config_file_duplicate(teardown_file_env, home: Any): - """Test raise error with init config file which already exists.""" - if home: - os.environ[ENV_PYDS_HOME] = home - elif DEV_MODE: - pytest.skip( - "Avoid delete ~/pydolphinscheduler/config.yaml by accident when test locally." - ) - assert not config_path().exists() - configuration.init_config_file() - assert config_path().exists() - - with pytest.raises(PyDSConfException, match=".*file already exists.*"): - configuration.init_config_file() - - -def test_get_configs_build_in(): - """Test function :func:`get_configs` with build-in config file.""" - content = get_file_content(BUILD_IN_CONFIG_PATH) - assert YamlParser(content).src_parser == configuration.get_configs().src_parser - assert YamlParser(content).dict_parser == configuration.get_configs().dict_parser - - -@pytest.mark.parametrize( - "key, val, new_val", - [ - ("java_gateway.address", "127.0.0.1", "127.1.1.1"), - ("java_gateway.port", 25333, 25555), - ("java_gateway.auto_convert", True, False), - ("default.user.name", "userPythonGateway", "editUserPythonGateway"), - ("default.user.password", "userPythonGateway", "editUserPythonGateway"), - ( - "default.user.email", - "userPythonGateway@dolphinscheduler.com", - "userPythonGateway@edit.com", - ), - ("default.user.phone", 11111111111, 22222222222), - ("default.user.state", 1, 0), - ("default.workflow.project", "project-pydolphin", "eidt-project-pydolphin"), - ("default.workflow.tenant", "tenant_pydolphin", "edit_tenant_pydolphin"), - ("default.workflow.user", "userPythonGateway", "editUserPythonGateway"), - ("default.workflow.queue", "queuePythonGateway", "editQueuePythonGateway"), - ("default.workflow.worker_group", "default", "specific"), - ("default.workflow.time_zone", "Asia/Shanghai", "Asia/Beijing"), - ("default.workflow.warning_type", "NONE", "ALL"), - ], -) -def test_single_config_get_set(teardown_file_env, key: str, val: Any, new_val: Any): - """Test function :func:`get_single_config` and :func:`set_single_config`.""" - assert val == get_single_config(key) - set_single_config(key, new_val) - assert new_val == get_single_config(key) - - -def test_single_config_get_set_not_exists_key(): - """Test function :func:`get_single_config` and :func:`set_single_config` error while key not exists.""" - not_exists_key = "i_am_not_exists_key" - with pytest.raises(PyDSConfException, match=".*do not exists.*"): - get_single_config(not_exists_key) - with pytest.raises(PyDSConfException, match=".*do not exists.*"): - set_single_config(not_exists_key, not_exists_key) - - -@pytest.mark.parametrize( - "config_name, expect", - [ - ("JAVA_GATEWAY_ADDRESS", "127.0.0.1"), - ("JAVA_GATEWAY_PORT", 25333), - ("JAVA_GATEWAY_AUTO_CONVERT", True), - ("USER_NAME", "userPythonGateway"), - ("USER_PASSWORD", "userPythonGateway"), - ("USER_EMAIL", "userPythonGateway@dolphinscheduler.com"), - ("USER_PHONE", "11111111111"), - ("USER_STATE", 1), - ("WORKFLOW_PROJECT", "project-pydolphin"), - ("WORKFLOW_TENANT", "tenant_pydolphin"), - ("WORKFLOW_USER", "userPythonGateway"), - ("WORKFLOW_QUEUE", "queuePythonGateway"), - ("WORKFLOW_WORKER_GROUP", "default"), - ("WORKFLOW_TIME_ZONE", "Asia/Shanghai"), - ("WORKFLOW_WARNING_TYPE", "NONE"), - ], -) -def test_get_configuration(config_name: str, expect: Any): - """Test get exists attribute in :mod:`configuration`.""" - assert expect == getattr(configuration, config_name) - - -@pytest.mark.parametrize( - "config_name, src, dest", - [ - ("JAVA_GATEWAY_ADDRESS", "127.0.0.1", "192.168.1.1"), - ("JAVA_GATEWAY_PORT", 25333, 25334), - ("JAVA_GATEWAY_AUTO_CONVERT", True, False), - ("USER_NAME", "userPythonGateway", "envUserPythonGateway"), - ("USER_PASSWORD", "userPythonGateway", "envUserPythonGateway"), - ( - "USER_EMAIL", - "userPythonGateway@dolphinscheduler.com", - "userPythonGateway@dolphinscheduler.com", - ), - ("USER_PHONE", "11111111111", "22222222222"), - ("USER_STATE", 1, 0), - ("WORKFLOW_PROJECT", "project-pydolphin", "env-project-pydolphin"), - ("WORKFLOW_TENANT", "tenant_pydolphin", "env-tenant_pydolphin"), - ("WORKFLOW_USER", "userPythonGateway", "envUserPythonGateway"), - ("WORKFLOW_QUEUE", "queuePythonGateway", "envQueuePythonGateway"), - ("WORKFLOW_WORKER_GROUP", "default", "custom"), - ("WORKFLOW_TIME_ZONE", "Asia/Shanghai", "America/Los_Angeles"), - ("WORKFLOW_WARNING_TYPE", "NONE", "ALL"), - ], -) -def test_get_configuration_env(config_name: str, src: Any, dest: Any): - """Test get exists attribute from environment variable in :mod:`configuration`.""" - assert getattr(configuration, config_name) == src - - env_name = f"PYDS_{config_name}" - os.environ[env_name] = str(dest) - # reload module configuration to re-get config from environment. - importlib.reload(configuration) - assert getattr(configuration, config_name) == dest - - # pop and reload configuration to test whether this config equal to `src` value - os.environ.pop(env_name, None) - importlib.reload(configuration) - assert getattr(configuration, config_name) == src - assert env_name not in os.environ diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_database.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_database.py deleted file mode 100644 index 1286a4a7f8..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_database.py +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Database.""" - - -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.core.database import Database - -TEST_DATABASE_DATASOURCE_NAME = "test_datasource" -TEST_DATABASE_TYPE_KEY = "type" -TEST_DATABASE_KEY = "datasource" - - -@pytest.mark.parametrize( - "expect", - [ - { - TEST_DATABASE_TYPE_KEY: "mock_type", - TEST_DATABASE_KEY: 1, - } - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "mock_type"}), -) -def test_get_datasource_detail(mock_datasource, mock_code_version, expect): - """Test :func:`get_database_type` and :func:`get_database_id` can return expect value.""" - database_info = Database( - TEST_DATABASE_DATASOURCE_NAME, TEST_DATABASE_TYPE_KEY, TEST_DATABASE_KEY - ) - assert expect == database_info diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_default_config_yaml.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_default_config_yaml.py deleted file mode 100644 index b4d5e07c7a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_default_config_yaml.py +++ /dev/null @@ -1,39 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test default config file.""" - -from ruamel.yaml import YAML -from ruamel.yaml.comments import CommentedMap - -from tests.testing.path import path_default_config_yaml - - -def nested_key_check(comment_map: CommentedMap) -> None: - """Test whether default configuration file exists specific character.""" - for key, val in comment_map.items(): - assert "." not in key, f"There is not allowed special character in key `{key}`." - if isinstance(val, CommentedMap): - nested_key_check(val) - - -def test_key_without_dot_delimiter(): - """Test wrapper of whether default configuration file exists specific character.""" - yaml = YAML() - with open(path_default_config_yaml, "r") as f: - comment_map = yaml.load(f.read()) - nested_key_check(comment_map) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_engine.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_engine.py deleted file mode 100644 index ba44fad669..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_engine.py +++ /dev/null @@ -1,148 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Engine.""" - - -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.core.engine import Engine, ProgramType - -TEST_ENGINE_TASK_TYPE = "ENGINE" -TEST_MAIN_CLASS = "org.apache.examples.mock.Mock" -TEST_MAIN_PACKAGE = "Mock.jar" -TEST_PROGRAM_TYPE = ProgramType.JAVA - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "mock_name"}), -) -def test_get_jar_detail(mock_resource, mock_code_version): - """Test :func:`get_jar_id` can return expect value.""" - name = "test_get_jar_detail" - task = Engine( - name, - TEST_ENGINE_TASK_TYPE, - TEST_MAIN_CLASS, - TEST_MAIN_PACKAGE, - TEST_PROGRAM_TYPE, - ) - assert 1 == task.get_jar_id() - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "test-task-params", - "task_type": "test-engine", - "main_class": "org.apache.examples.mock.Mock", - "main_package": "TestMock.jar", - "program_type": ProgramType.JAVA, - }, - { - "mainClass": "org.apache.examples.mock.Mock", - "mainJar": { - "id": 1, - }, - "programType": ProgramType.JAVA, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "mock_name"}), -) -def test_property_task_params(mock_resource, mock_code_version, attr, expect): - """Test task engine task property.""" - task = Engine(**attr) - assert expect == task.task_params - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "test-task-test_engine_get_define", - "task_type": "test-engine", - "main_class": "org.apache.examples.mock.Mock", - "main_package": "TestMock.jar", - "program_type": ProgramType.JAVA, - }, - { - "code": 123, - "name": "test-task-test_engine_get_define", - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "test-engine", - "taskParams": { - "mainClass": "org.apache.examples.mock.Mock", - "mainJar": { - "id": 1, - }, - "programType": ProgramType.JAVA, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "mock_name"}), -) -def test_engine_get_define(mock_resource, mock_code_version, attr, expect): - """Test task engine function get_define.""" - task = Engine(**attr) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_process_definition.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_process_definition.py deleted file mode 100644 index 30445bfbf3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_process_definition.py +++ /dev/null @@ -1,502 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test process definition.""" - -from datetime import datetime -from typing import Any, List -from unittest.mock import patch - -import pytest -from freezegun import freeze_time - -from pydolphinscheduler import configuration -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.core.resource import Resource -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.models import Project, Tenant, User -from pydolphinscheduler.tasks.switch import Branch, Default, Switch, SwitchCondition -from pydolphinscheduler.utils.date import conv_to_schedule -from tests.testing.task import Task - -TEST_PROCESS_DEFINITION_NAME = "simple-test-process-definition" -TEST_TASK_TYPE = "test-task-type" - - -@pytest.mark.parametrize("func", ["run", "submit", "start"]) -def test_process_definition_key_attr(func): - """Test process definition have specific functions or attributes.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - assert hasattr( - pd, func - ), f"ProcessDefinition instance don't have attribute `{func}`" - - -@pytest.mark.parametrize( - "name,value", - [ - ("timezone", configuration.WORKFLOW_TIME_ZONE), - ("project", Project(configuration.WORKFLOW_PROJECT)), - ("tenant", Tenant(configuration.WORKFLOW_TENANT)), - ( - "user", - User( - configuration.USER_NAME, - configuration.USER_PASSWORD, - configuration.USER_EMAIL, - configuration.USER_PHONE, - configuration.WORKFLOW_TENANT, - configuration.WORKFLOW_QUEUE, - configuration.USER_STATE, - ), - ), - ("worker_group", configuration.WORKFLOW_WORKER_GROUP), - ("warning_type", configuration.WORKFLOW_WARNING_TYPE), - ("warning_group_id", 0), - ("release_state", 1), - ], -) -def test_process_definition_default_value(name, value): - """Test process definition default attributes.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - assert getattr(pd, name) == value, ( - f"ProcessDefinition instance attribute `{name}` not with " - f"except default value `{getattr(pd, name)}`" - ) - - -@pytest.mark.parametrize( - "name,cls,expect", - [ - ("name", str, "name"), - ("description", str, "description"), - ("schedule", str, "schedule"), - ("timezone", str, "timezone"), - ("worker_group", str, "worker_group"), - ("warning_type", str, "FAILURE"), - ("warning_group_id", int, 1), - ("timeout", int, 1), - ("param", dict, {"key": "value"}), - ( - "resource_list", - List, - [Resource(name="/dev/test.py", content="hello world", description="desc")], - ), - ], -) -def test_set_attr(name, cls, expect): - """Test process definition set attributes which get with same type.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - setattr(pd, name, expect) - assert ( - getattr(pd, name) == expect - ), f"ProcessDefinition set attribute `{name}` do not work expect" - - -@pytest.mark.parametrize( - "value,expect", - [ - ("online", 1), - ("offline", 0), - ], -) -def test_set_release_state(value, expect): - """Test process definition set release_state attributes.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME, release_state=value) as pd: - assert ( - getattr(pd, "release_state") == expect - ), "ProcessDefinition set attribute release_state do not return expect value." - - -@pytest.mark.parametrize( - "value", - [ - "oneline", - "offeline", - 1, - 0, - None, - ], -) -def test_set_release_state_error(value): - """Test process definition set release_state attributes with error.""" - pd = ProcessDefinition(TEST_PROCESS_DEFINITION_NAME, release_state=value) - with pytest.raises( - PyDSParamException, - match="Parameter release_state only support `online` or `offline` but get.*", - ): - pd.release_state - - -@pytest.mark.parametrize( - "set_attr,set_val,get_attr,get_val", - [ - ("_project", "project", "project", Project("project")), - ("_tenant", "tenant", "tenant", Tenant("tenant")), - ("_start_time", "2021-01-01", "start_time", datetime(2021, 1, 1)), - ("_end_time", "2021-01-01", "end_time", datetime(2021, 1, 1)), - ], -) -def test_set_attr_return_special_object(set_attr, set_val, get_attr, get_val): - """Test process definition set attributes which get with different type.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - setattr(pd, set_attr, set_val) - assert get_val == getattr( - pd, get_attr - ), f"Set attribute {set_attr} can not get back with {get_val}." - - -@pytest.mark.parametrize( - "val,expect", - [ - (datetime(2021, 1, 1), datetime(2021, 1, 1)), - (None, None), - ("2021-01-01", datetime(2021, 1, 1)), - ("2021-01-01 01:01:01", datetime(2021, 1, 1, 1, 1, 1)), - ], -) -def test__parse_datetime(val, expect): - """Test process definition function _parse_datetime. - - Only two datetime test cases here because we have more test cases in tests/utils/test_date.py file. - """ - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - assert expect == pd._parse_datetime( - val - ), f"Function _parse_datetime with unexpect value by {val}." - - -@pytest.mark.parametrize( - "val", - [ - 20210101, - (2021, 1, 1), - {"year": "2021", "month": "1", "day": 1}, - ], -) -def test__parse_datetime_not_support_type(val: Any): - """Test process definition function _parse_datetime not support type error.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - with pytest.raises(PyDSParamException, match="Do not support value type.*?"): - pd._parse_datetime(val) - - -@pytest.mark.parametrize( - "val", - [ - "ALLL", - "nonee", - ], -) -def test_warn_type_not_support_type(val: str): - """Test process definition param warning_type not support type error.""" - with pytest.raises( - PyDSParamException, match="Parameter `warning_type` with unexpect value.*?" - ): - ProcessDefinition(TEST_PROCESS_DEFINITION_NAME, warning_type=val) - - -@pytest.mark.parametrize( - "param, expect", - [ - ( - None, - [], - ), - ( - {}, - [], - ), - ( - {"key1": "val1"}, - [ - { - "prop": "key1", - "direct": "IN", - "type": "VARCHAR", - "value": "val1", - } - ], - ), - ( - { - "key1": "val1", - "key2": "val2", - }, - [ - { - "prop": "key1", - "direct": "IN", - "type": "VARCHAR", - "value": "val1", - }, - { - "prop": "key2", - "direct": "IN", - "type": "VARCHAR", - "value": "val2", - }, - ], - ), - ], -) -def test_property_param_json(param, expect): - """Test ProcessDefinition's property param_json.""" - pd = ProcessDefinition(TEST_PROCESS_DEFINITION_NAME, param=param) - assert pd.param_json == expect - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test__pre_submit_check_switch_without_param(mock_code_version): - """Test :func:`_pre_submit_check` if process definition with switch but without attribute param.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - parent = Task(name="parent", task_type=TEST_TASK_TYPE) - switch_child_1 = Task(name="switch_child_1", task_type=TEST_TASK_TYPE) - switch_child_2 = Task(name="switch_child_2", task_type=TEST_TASK_TYPE) - switch_condition = SwitchCondition( - Branch(condition="${var} > 1", task=switch_child_1), - Default(task=switch_child_2), - ) - - switch = Switch(name="switch", condition=switch_condition) - parent >> switch - with pytest.raises( - PyDSParamException, - match="Parameter param or at least one local_param of task must " - "be provider if task Switch in process definition.", - ): - pd._pre_submit_check() - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test__pre_submit_check_switch_with_local_params(mock_code_version): - """Test :func:`_pre_submit_check` if process definition with switch with local params of task.""" - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - parent = Task( - name="parent", - task_type=TEST_TASK_TYPE, - local_params=[ - {"prop": "var", "direct": "OUT", "type": "VARCHAR", "value": ""} - ], - ) - switch_child_1 = Task(name="switch_child_1", task_type=TEST_TASK_TYPE) - switch_child_2 = Task(name="switch_child_2", task_type=TEST_TASK_TYPE) - switch_condition = SwitchCondition( - Branch(condition="${var} > 1", task=switch_child_1), - Default(task=switch_child_2), - ) - - switch = Switch(name="switch", condition=switch_condition) - parent >> switch - pd._pre_submit_check() - - -def test_process_definition_get_define_without_task(): - """Test process definition function get_define without task.""" - expect = { - "name": TEST_PROCESS_DEFINITION_NAME, - "description": None, - "project": configuration.WORKFLOW_PROJECT, - "tenant": configuration.WORKFLOW_TENANT, - "workerGroup": configuration.WORKFLOW_WORKER_GROUP, - "warningType": configuration.WORKFLOW_WARNING_TYPE, - "warningGroupId": 0, - "timeout": 0, - "releaseState": 1, - "param": None, - "tasks": {}, - "taskDefinitionJson": [{}], - "taskRelationJson": [{}], - "resourceList": [], - } - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - assert pd.get_define() == expect - - -def test_process_definition_simple_context_manager(): - """Test simple create workflow in process definition context manager mode.""" - expect_tasks_num = 5 - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd: - for i in range(expect_tasks_num): - curr_task = Task(name=f"task-{i}", task_type=f"type-{i}") - # Set deps task i as i-1 parent - if i > 0: - pre_task = pd.get_one_task_by_name(f"task-{i - 1}") - curr_task.set_upstream(pre_task) - assert len(pd.tasks) == expect_tasks_num - - # Test if task process_definition same as origin one - task: Task = pd.get_one_task_by_name("task-0") - assert pd is task.process_definition - - # Test if all tasks with expect deps - for i in range(expect_tasks_num): - task: Task = pd.get_one_task_by_name(f"task-{i}") - if i == 0: - assert task._upstream_task_codes == set() - assert task._downstream_task_codes == { - pd.get_one_task_by_name("task-1").code - } - elif i == expect_tasks_num - 1: - assert task._upstream_task_codes == { - pd.get_one_task_by_name(f"task-{i - 1}").code - } - assert task._downstream_task_codes == set() - else: - assert task._upstream_task_codes == { - pd.get_one_task_by_name(f"task-{i - 1}").code - } - assert task._downstream_task_codes == { - pd.get_one_task_by_name(f"task-{i + 1}").code - } - - -def test_process_definition_simple_separate(): - """Test process definition simple create workflow in separate mode. - - This test just test basic information, cause most of test case is duplicate to - test_process_definition_simple_context_manager. - """ - expect_tasks_num = 5 - pd = ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) - for i in range(expect_tasks_num): - curr_task = Task( - name=f"task-{i}", - task_type=f"type-{i}", - process_definition=pd, - ) - # Set deps task i as i-1 parent - if i > 0: - pre_task = pd.get_one_task_by_name(f"task-{i - 1}") - curr_task.set_upstream(pre_task) - assert len(pd.tasks) == expect_tasks_num - assert all(["task-" in task.name for task in pd.task_list]) - - -@pytest.mark.parametrize( - "user_attrs", - [ - {"tenant": "tenant_specific"}, - ], -) -def test_set_process_definition_user_attr(user_attrs): - """Test user with correct attributes if we specific assigned to process definition object.""" - default_value = { - "tenant": configuration.WORKFLOW_TENANT, - } - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME, **user_attrs) as pd: - user = pd.user - for attr in default_value: - # Get assigned attribute if we specific, else get default value - except_attr = ( - user_attrs[attr] if attr in user_attrs else default_value[attr] - ) - # Get actually attribute of user object - actual_attr = getattr(user, attr) - assert ( - except_attr == actual_attr - ), f"Except attribute is {except_attr} but get {actual_attr}" - - -def test_schedule_json_none_schedule(): - """Test function schedule_json with None as schedule.""" - with ProcessDefinition( - TEST_PROCESS_DEFINITION_NAME, - schedule=None, - ) as pd: - assert pd.schedule_json is None - - -# We freeze time here, because we test start_time with None, and if will get datetime.datetime.now. If we do -# not freeze time, it will cause flaky test here. -@freeze_time("2021-01-01") -@pytest.mark.parametrize( - "start_time,end_time,expect_date", - [ - ( - "20210101", - "20210201", - {"start_time": "2021-01-01 00:00:00", "end_time": "2021-02-01 00:00:00"}, - ), - ( - "2021-01-01", - "2021-02-01", - {"start_time": "2021-01-01 00:00:00", "end_time": "2021-02-01 00:00:00"}, - ), - ( - "2021/01/01", - "2021/02/01", - {"start_time": "2021-01-01 00:00:00", "end_time": "2021-02-01 00:00:00"}, - ), - # Test mix pattern - ( - "2021/01/01 01:01:01", - "2021-02-02 02:02:02", - {"start_time": "2021-01-01 01:01:01", "end_time": "2021-02-02 02:02:02"}, - ), - ( - "2021/01/01 01:01:01", - "20210202 020202", - {"start_time": "2021-01-01 01:01:01", "end_time": "2021-02-02 02:02:02"}, - ), - ( - "20210101 010101", - "2021-02-02 02:02:02", - {"start_time": "2021-01-01 01:01:01", "end_time": "2021-02-02 02:02:02"}, - ), - # Test None value - ( - "2021/01/01 01:02:03", - None, - {"start_time": "2021-01-01 01:02:03", "end_time": "9999-12-31 23:59:59"}, - ), - ( - None, - None, - { - "start_time": conv_to_schedule(datetime(2021, 1, 1)), - "end_time": "9999-12-31 23:59:59", - }, - ), - ], -) -def test_schedule_json_start_and_end_time(start_time, end_time, expect_date): - """Test function schedule_json about handle start_time and end_time. - - Only two datetime test cases here because we have more test cases in tests/utils/test_date.py file. - """ - schedule = "0 0 0 * * ? *" - expect = { - "crontab": schedule, - "startTime": expect_date["start_time"], - "endTime": expect_date["end_time"], - "timezoneId": configuration.WORKFLOW_TIME_ZONE, - } - with ProcessDefinition( - TEST_PROCESS_DEFINITION_NAME, - schedule=schedule, - start_time=start_time, - end_time=end_time, - timezone=configuration.WORKFLOW_TIME_ZONE, - ) as pd: - assert pd.schedule_json == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_resource_definition.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_resource_definition.py deleted file mode 100644 index 07fcac3547..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_resource_definition.py +++ /dev/null @@ -1,68 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test resource definition.""" -import pytest - -from pydolphinscheduler.core.resource import Resource -from pydolphinscheduler.exceptions import PyDSParamException - - -def test_resource(): - """Test resource set attributes which get with same type.""" - name = "/dev/test.py" - content = """print("hello world")""" - description = "hello world" - user_name = "test_user" - expect = { - "name": name, - "content": content, - "description": description, - "userName": user_name, - } - resourceDefinition = Resource( - name=name, content=content, description=description, user_name=user_name - ) - assert resourceDefinition.get_define() == expect - - -def test_empty_user_name(): - """Tests for the exception get info from database when the user name is null.""" - name = "/dev/test.py" - content = """print("hello world")""" - description = "hello world" - resourceDefinition = Resource(name=name, content=content, description=description) - with pytest.raises( - PyDSParamException, - match="`user_name` is required when querying resources from python gate.", - ): - resourceDefinition.get_info_from_database() - - -def test_empty_content(): - """Tests for the exception create or update resource when the user name or content is empty.""" - name = "/dev/test.py" - user_name = "test_user" - description = "hello world" - resourceDefinition = Resource( - name=name, description=description, user_name=user_name - ) - with pytest.raises( - PyDSParamException, - match="`user_name` and `content` are required when create or update resource from python gate.", - ): - resourceDefinition.create_or_update_resource() diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py deleted file mode 100644 index c6ef7773ae..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py +++ /dev/null @@ -1,470 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task class function.""" -import logging -import re -from typing import Set -from unittest.mock import PropertyMock, patch - -import pytest - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.core.task import Task, TaskRelation -from pydolphinscheduler.exceptions import PyResPluginException -from pydolphinscheduler.resources_plugin import Local -from tests.testing.task import Task as TestTask -from tests.testing.task import TaskWithCode - -TEST_TASK_RELATION_SET = set() -TEST_TASK_RELATION_SIZE = 0 - - -@pytest.mark.parametrize( - "addition, ignore, expect", - [ - ( - set(), - set(), - { - "local_params", - "resource_list", - "dependence", - "wait_start_timeout", - "condition_result", - }, - ), - ( - set(), - {"dependence", "condition_result", "not_exists"}, - { - "local_params", - "resource_list", - "wait_start_timeout", - }, - ), - ( - { - "not_exists_1", - "not_exists_2", - }, - set(), - { - "not_exists_1", - "not_exists_2", - "local_params", - "resource_list", - "dependence", - "wait_start_timeout", - "condition_result", - }, - ), - # test addition and ignore conflict to see behavior - ( - { - "not_exists", - }, - {"condition_result", "not_exists"}, - { - "not_exists", - "local_params", - "resource_list", - "dependence", - "wait_start_timeout", - }, - ), - ], -) -def test__get_attr(addition: Set, ignore: Set, expect: Set): - """Test task function `_get_attr`.""" - task = TestTask( - name="test-get-attr", - task_type="test", - ) - task._task_custom_attr = addition - task._task_ignore_attr = ignore - assert task._get_attr() == expect - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - dict(), - { - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ), - ( - { - "local_params": ["foo", "bar"], - "resource_list": ["foo", "bar"], - "dependence": {"foo", "bar"}, - "wait_start_timeout": {"foo", "bar"}, - "condition_result": {"foo": ["bar"]}, - }, - { - "localParams": ["foo", "bar"], - "resourceList": [{"id": 1}], - "dependence": {"foo", "bar"}, - "waitStartTimeout": {"foo", "bar"}, - "conditionResult": {"foo": ["bar"]}, - }, - ), - ], -) -@patch( - "pydolphinscheduler.core.resource.Resource.get_id_from_database", - return_value=1, -) -@patch( - "pydolphinscheduler.core.task.Task.user_name", - return_value="test_user", -) -def test_property_task_params(mock_resource, mock_user_name, attr, expect): - """Test class task property.""" - task = TestTask( - "test-property-task-params", - "test-task", - **attr, - ) - assert expect == task.task_params - - -@pytest.mark.parametrize( - "pre_code, post_code, expect", - [ - (123, 456, hash("123 -> 456")), - (12345678, 987654321, hash("12345678 -> 987654321")), - ], -) -def test_task_relation_hash_func(pre_code, post_code, expect): - """Test TaskRelation magic function :func:`__hash__`.""" - task_param = TaskRelation(pre_task_code=pre_code, post_task_code=post_code) - assert hash(task_param) == expect - - -@pytest.mark.parametrize( - "pre_code, post_code, size_add", - [ - (123, 456, 1), - (123, 456, 0), - (456, 456, 1), - (123, 123, 1), - (456, 123, 1), - (0, 456, 1), - (123, 0, 1), - ], -) -def test_task_relation_add_to_set(pre_code, post_code, size_add): - """Test TaskRelation with different pre_code and post_code add to set behavior. - - Here we use global variable to keep set of :class:`TaskRelation` instance and the number we expect - of the size when we add a new task relation to exists set. - """ - task_relation = TaskRelation(pre_task_code=pre_code, post_task_code=post_code) - TEST_TASK_RELATION_SET.add(task_relation) - # hint python interpreter use global variable instead of local's - global TEST_TASK_RELATION_SIZE - TEST_TASK_RELATION_SIZE += size_add - assert len(TEST_TASK_RELATION_SET) == TEST_TASK_RELATION_SIZE - - -def test_task_relation_to_dict(): - """Test TaskRelation object function to_dict.""" - pre_task_code = 123 - post_task_code = 456 - expect = { - "name": "", - "preTaskCode": pre_task_code, - "postTaskCode": post_task_code, - "preTaskVersion": 1, - "postTaskVersion": 1, - "conditionType": 0, - "conditionParams": {}, - } - task_relation = TaskRelation( - pre_task_code=pre_task_code, post_task_code=post_task_code - ) - assert task_relation.get_define() == expect - - -def test_task_get_define(): - """Test Task object function get_define.""" - code = 123 - version = 1 - name = "test_task_get_define" - task_type = "test_task_get_define_type" - expect = { - "code": code, - "name": name, - "version": version, - "description": None, - "delayTime": 0, - "taskType": task_type, - "taskParams": { - "resourceList": [], - "localParams": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = Task(name=name, task_type=task_type) - assert task.get_define() == expect - - -@pytest.mark.parametrize("shift", ["<<", ">>"]) -def test_two_tasks_shift(shift: str): - """Test bit operator between tasks. - - Here we test both `>>` and `<<` bit operator. - """ - upstream = TestTask(name="upstream", task_type=shift) - downstream = TestTask(name="downstream", task_type=shift) - if shift == "<<": - downstream << upstream - elif shift == ">>": - upstream >> downstream - else: - assert False, f"Unexpect bit operator type {shift}." - assert ( - 1 == len(upstream._downstream_task_codes) - and downstream.code in upstream._downstream_task_codes - ), "Task downstream task attributes error, downstream codes size or specific code failed." - assert ( - 1 == len(downstream._upstream_task_codes) - and upstream.code in downstream._upstream_task_codes - ), "Task upstream task attributes error, upstream codes size or upstream code failed." - - -@pytest.mark.parametrize( - "dep_expr, flag", - [ - ("task << tasks", "upstream"), - ("tasks << task", "downstream"), - ("task >> tasks", "downstream"), - ("tasks >> task", "upstream"), - ], -) -def test_tasks_list_shift(dep_expr: str, flag: str): - """Test bit operator between task and sequence of tasks. - - Here we test both `>>` and `<<` bit operator. - """ - reverse_dict = { - "upstream": "downstream", - "downstream": "upstream", - } - task_type = "dep_task_and_tasks" - task = TestTask(name="upstream", task_type=task_type) - tasks = [ - TestTask(name="downstream1", task_type=task_type), - TestTask(name="downstream2", task_type=task_type), - ] - - # Use build-in function eval to simply test case and reduce duplicate code - eval(dep_expr) - direction_attr = f"_{flag}_task_codes" - reverse_direction_attr = f"_{reverse_dict[flag]}_task_codes" - assert 2 == len(getattr(task, direction_attr)) - assert [t.code in getattr(task, direction_attr) for t in tasks] - - assert all([1 == len(getattr(t, reverse_direction_attr)) for t in tasks]) - assert all([task.code in getattr(t, reverse_direction_attr) for t in tasks]) - - -def test_add_duplicate(caplog): - """Test add task which code already in process definition.""" - with ProcessDefinition("test_add_duplicate_workflow") as _: - TaskWithCode(name="test_task_1", task_type="test", code=123, version=1) - with caplog.at_level(logging.WARNING): - TaskWithCode( - name="test_task_duplicate_code", task_type="test", code=123, version=2 - ) - assert all( - [ - caplog.text.startswith("WARNING pydolphinscheduler"), - re.findall("already in process definition", caplog.text), - ] - ) - - -@pytest.mark.parametrize( - "val, expected", - [ - ("a.sh", "echo Test task attribute ext_attr"), - ("a.zsh", "echo Test task attribute ext_attr"), - ("echo Test task attribute ext_attr", "echo Test task attribute ext_attr"), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.task.Task.ext", - new_callable=PropertyMock, - return_value={".sh", ".zsh"}, -) -@patch( - "pydolphinscheduler.core.task.Task.ext_attr", - new_callable=PropertyMock, - return_value="_raw_script", -) -@patch( - "pydolphinscheduler.core.task.Task._raw_script", - create=True, - new_callable=PropertyMock, -) -@patch("pydolphinscheduler.core.task.Task.get_plugin") -def test_task_ext_attr( - m_plugin, m_raw_script, m_ext_attr, m_ext, m_code_version, val, expected -): - """Test task attribute ext_attr.""" - m_plugin.return_value.read_file.return_value = expected - m_raw_script.return_value = val - task = Task("test_task_ext_attr", "test_task_ext_attr") - assert expected == getattr(task, "raw_script") - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "name": "test_task_abtain_res_plugin", - "task_type": "TaskType", - "resource_plugin": Local("prefix"), - "process_definition": ProcessDefinition( - name="process_definition", - resource_plugin=Local("prefix"), - ), - }, - "Local", - ), - ( - { - "name": "test_task_abtain_res_plugin", - "task_type": "TaskType", - "resource_plugin": Local("prefix"), - }, - "Local", - ), - ( - { - "name": "test_task_abtain_res_plugin", - "task_type": "TaskType", - "process_definition": ProcessDefinition( - name="process_definition", - resource_plugin=Local("prefix"), - ), - }, - "Local", - ), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch("pydolphinscheduler.core.task.Task.get_content") -def test_task_obtain_res_plugin(m_get_content, m_code_version, attr, expected): - """Test task obtaining resource plug-in.""" - task = Task(**attr) - assert expected == task.get_plugin().__class__.__name__ - - -@pytest.mark.parametrize( - "attr", - [ - { - "name": "test_task_abtain_res_plugin", - "task_type": "TaskType", - "process_definition": ProcessDefinition( - name="process_definition", - ), - }, - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch("pydolphinscheduler.core.task.Task.get_content") -def test_task_obtain_res_plugin_exception(m_get_content, m_code_version, attr): - """Test task obtaining resource plug-in exception.""" - with pytest.raises( - PyResPluginException, - match="The execution command of this task is a file, but the resource plugin is empty", - ): - task = Task(**attr) - task.get_plugin() - - -@pytest.mark.parametrize( - "resources, expect", - [ - ( - ["/dev/test.py"], - [{"id": 1}], - ), - ( - ["/dev/test.py", {"id": 2}], - [{"id": 1}, {"id": 2}], - ), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.resource.Resource.get_id_from_database", - return_value=1, -) -@patch( - "pydolphinscheduler.core.task.Task.user_name", - return_value="test_user", -) -def test_python_resource_list( - mock_code_version, mock_resource, mock_user_name, resources, expect -): - """Test python task resource list.""" - task = Task( - name="python_resource_list.", - task_type="PYTHON", - resource_list=resources, - ) - assert task.resource_list == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_yaml_process_define.py b/dolphinscheduler-python/pydolphinscheduler/tests/core/test_yaml_process_define.py deleted file mode 100644 index 99ad179a5f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/core/test_yaml_process_define.py +++ /dev/null @@ -1,191 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test YAML process.""" - -import os -from pathlib import Path -from unittest.mock import patch - -import pytest - -from pydolphinscheduler import configuration, tasks -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.core.yaml_process_define import ( - ParseTool, - create_process_definition, - get_task_cls, -) -from pydolphinscheduler.exceptions import PyDSTaskNoFoundException -from tests.testing.path import path_yaml_example -from tests.testing.task import Task - - -@pytest.mark.parametrize( - "string_param, expect", - [ - ("$ENV{PROJECT_NAME}", "~/pydolphinscheduler"), - ], -) -def test_parse_tool_env_exist(string_param, expect): - """Test parsing the environment variable.""" - os.environ["PROJECT_NAME"] = expect - assert expect == ParseTool.parse_string_param_if_env(string_param) - - -def test_parse_tool_env_not_exist(): - """Test parsing the not exist environment variable.""" - key = "THIS_ENV_NOT_EXIST_0000000" - string_param = "$ENV{%s}" % key - expect = "$" + key - assert expect == ParseTool.parse_string_param_if_env(string_param) - - -@pytest.mark.parametrize( - "string_param, expect_key", - [ - ("${CONFIG.java_gateway.address}", "java_gateway.address"), - ("${CONFIG.WORKFLOW_PROJECT}", "default.workflow.project"), - ], -) -def test_parse_tool_config(string_param, expect_key): - """Test parsing configuration.""" - expect = configuration.get_single_config(expect_key) - assert expect == ParseTool.parse_string_param_if_config(string_param) - - -def test_parse_possible_yaml_file(): - """Test parsing possible path.""" - folder = Path(path_yaml_example) - file_name = "Shell.yaml" - path = folder.joinpath(file_name) - - with open(path, "r") as f: - expect = "".join(f) - - string_param = '$FILE{"%s"}' % file_name - content_ = ParseTool.parse_string_param_if_file(string_param, base_folder=folder) - - assert expect == content_ - - -def test_parse_tool_parse_possible_path_file(): - """Test parsing possible path.""" - folder = Path(path_yaml_example) - file_name = "Shell.yaml" - path = folder.joinpath(file_name) - - possible_path = ParseTool.get_possible_path(path, base_folder=folder) - assert path == possible_path - - possible_path = ParseTool.get_possible_path(file_name, base_folder=folder) - assert path == possible_path - - possible_path = ParseTool.get_possible_path(file_name, base_folder=".") - assert path != possible_path - - -@pytest.mark.parametrize( - "task_type, expect", - [ - ("shell", tasks.Shell), - ("Shell", tasks.Shell), - ("ShEll", tasks.Shell), - ("Condition", tasks.Condition), - ("DataX", tasks.DataX), - ("CustomDataX", tasks.CustomDataX), - ("Dependent", tasks.Dependent), - ("Flink", tasks.Flink), - ("Http", tasks.Http), - ("MR", tasks.MR), - ("Procedure", tasks.Procedure), - ("Python", tasks.Python), - ("Shell", tasks.Shell), - ("Spark", tasks.Spark), - ("Sql", tasks.Sql), - ("SubProcess", tasks.SubProcess), - ("Switch", tasks.Switch), - ("SageMaker", tasks.SageMaker), - ], -) -def test_get_task(task_type, expect): - """Test get task function.""" - assert expect == get_task_cls(task_type) - - -@pytest.mark.parametrize( - "task_type", - [ - ("MYSQL"), - ], -) -def test_get_error(task_type): - """Test get task cls error.""" - with pytest.raises( - PyDSTaskNoFoundException, - match=f"not find task {task_type}", - ): - get_task_cls(task_type) - - -@pytest.mark.parametrize( - "yaml_file", - [ - ("Condition.yaml"), - ("DataX.yaml"), - ("Dependent.yaml"), - ("Flink.yaml"), - ("Procedure.yaml"), - ("Http.yaml"), - ("MapReduce.yaml"), - ("Python.yaml"), - ("Shell.yaml"), - ("Spark.yaml"), - ("Sql.yaml"), - ("SubProcess.yaml"), - # ("Switch.yaml"), - ("MoreConfiguration.yaml"), - ], -) -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "test"}), -) -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "mock_type"}), -) -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": 0, - "processDefinitionCode": 0, - "taskDefinitionCode": 0, - }, -) -@patch.object(ProcessDefinition, "run") -@patch.object(ProcessDefinition, "submit") -def test_get_create_process_definition( - prun, psubmit, dep_item, db_info, resource_info, yaml_file -): - """Test create_process_definition function to parse example YAML file.""" - yaml_file_path = Path(path_yaml_example).joinpath(yaml_file) - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - side_effect=Task("test_func_wrap", "func_wrap").gen_code_and_version, - ): - create_process_definition(yaml_file_path) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/example/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/example/__init__.py deleted file mode 100644 index 49323e711d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/example/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init example package tests.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py b/dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py deleted file mode 100644 index 319ad961f7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/example/test_example.py +++ /dev/null @@ -1,176 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test example.""" - -import ast -import importlib -from unittest.mock import patch - -import pytest - -from tests.testing.constants import task_without_example -from tests.testing.path import get_all_examples, get_tasks -from tests.testing.task import Task - -process_definition_name = set() - - -def import_module(script_name, script_path): - """Import and run example module in examples directory.""" - spec = importlib.util.spec_from_file_location(script_name, script_path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - return module - - -def test_task_without_example(): - """Test task which without example. - - Avoiding add new type of tasks but without adding example describe how to use it. - """ - # We use example/tutorial.py as shell task example - ignore_name = {"__init__.py", "shell.py", "func_wrap.py"} - all_tasks = {task.stem for task in get_tasks(ignore_name=ignore_name)} - - have_example_tasks = set() - start = "task_" - end = "_example" - for ex in get_all_examples(): - stem = ex.stem - if stem.startswith(start) and stem.endswith(end): - task_name = stem.replace(start, "").replace(end, "") - have_example_tasks.add(task_name) - - assert all_tasks.difference(have_example_tasks) == task_without_example - - -@pytest.fixture -def setup_and_teardown_for_stuff(): - """Fixture of py.test handle setup and teardown.""" - yield - global process_definition_name - process_definition_name = set() - - -def submit_check_without_same_name(self): - """Side effect for verifying process definition name and adding it to global variable.""" - if self.name in process_definition_name: - raise ValueError( - "Example process definition should not have same name, but get duplicate name: %s", - self.name, - ) - submit_add_process_definition(self) - - -def submit_add_process_definition(self): - """Side effect for adding process definition name to global variable.""" - process_definition_name.add(self.name) - - -def test_example_basic(): - """Test example basic information. - - Which including: - * File extension name is `.py` - * All example except `tutorial.py` is end with keyword "_example" - * All example must have not empty `__doc__`. - """ - for ex in get_all_examples(): - # All files in example is python script - assert ( - ex.suffix == ".py" - ), f"We expect all examples is python script, but get {ex.name}." - - # All except tutorial and __init__ is end with keyword "_example" - if ( - ex.stem - not in ("tutorial", "tutorial_decorator", "tutorial_resource_plugin") - and ex.stem != "__init__" - ): - assert ex.stem.endswith( - "_example" - ), f"We expect all examples script end with keyword '_example', but get {ex.stem}." - - # All files have __doc__ - tree = ast.parse(ex.read_text()) - example_doc = ast.get_docstring(tree, clean=False) - assert ( - example_doc is not None - ), f"We expect all examples have __doc__, but {ex.name} do not." - - -@patch("pydolphinscheduler.core.process_definition.ProcessDefinition.start") -@patch( - "pydolphinscheduler.core.process_definition.ProcessDefinition.submit", - side_effect=submit_check_without_same_name, - autospec=True, -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - # Example bulk_create_example.py would create workflow dynamic by :func:`get_one_task_by_name` - # and would raise error in :func:`get_one_task_by_name` if we return constant value - # using :arg:`return_value` - side_effect=Task("test_example", "test_example").gen_code_and_version, -) -def test_example_process_definition_without_same_name( - mock_code_version, mock_submit, mock_start -): - """Test all examples file without same process definition's name. - - Our process definition would compete with others if we have same process definition name. It will make - different between actually workflow and our workflow-as-code file which make users feel strange. - """ - for ex in get_all_examples(): - # We use side_effect `submit_check_without_same_name` overwrite :func:`submit` - # and check whether it have duplicate name or not - import_module(ex.name, str(ex)) - assert True - - -@patch("pydolphinscheduler.core.process_definition.ProcessDefinition.start") -@patch( - "pydolphinscheduler.core.process_definition.ProcessDefinition.submit", - side_effect=submit_add_process_definition, - autospec=True, -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - # Example bulk_create_example.py would create workflow dynamic by :func:`get_one_task_by_name` - # and would raise error in :func:`get_one_task_by_name` if we return constant value - # using :arg:`return_value` - side_effect=Task("test_example", "test_example").gen_code_and_version, -) -def test_file_name_in_process_definition(mock_code_version, mock_submit, mock_start): - """Test example file name in example definition name. - - We should not directly assert equal, because some of the examples contain - more than one process definition. - """ - global process_definition_name - for ex in get_all_examples(): - # Skip __init__ file - if ex.stem == "__init__": - continue - # Skip bulk_create_example check, cause it contain multiple workflow and - # without one named bulk_create_example - if ex.stem == "bulk_create_example": - continue - process_definition_name = set() - assert ex.stem not in process_definition_name - import_module(ex.name, str(ex)) - assert ex.stem in process_definition_name diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/__init__.py deleted file mode 100644 index 65625a9f04..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test integration between Python API and PythonGatewayService.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/conftest.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/conftest.py deleted file mode 100644 index c15b89768d..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/conftest.py +++ /dev/null @@ -1,51 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""py.test conftest.py file for package integration test.""" - -import os - -import pytest - -from tests.testing.docker_wrapper import DockerWrapper - - -@pytest.fixture(scope="package", autouse=True) -def docker_setup_teardown(): - """Fixture for whole package tests, Set up and teardown docker env. - - Fixture in file named ``conftest.py`` with ``scope=package`` could be auto import in the - whole package, and with attribute ``autouse=True`` will be auto-use for each test cases. - - .. seealso:: - For more information about conftest.py see: - https://docs.pytest.org/en/latest/example/simple.html#package-directory-level-fixtures-setups - """ - if os.environ.get("skip_launch_docker") == "true": - yield True - else: - docker_wrapper = DockerWrapper( - image="apache/dolphinscheduler-standalone-server:ci", - container_name="ci-dolphinscheduler-standalone-server", - ) - ports = {"25333/tcp": 25333, "12345/tcp": 12345} - container = docker_wrapper.run_until_log( - log="Started StandaloneServer in", tty=True, ports=ports - ) - assert container is not None - yield - docker_wrapper.remove_container() diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_java_gateway.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_java_gateway.py deleted file mode 100644 index 8b7c5ff845..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_java_gateway.py +++ /dev/null @@ -1,53 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test pydolphinscheduler java gateway.""" - - -from py4j.java_gateway import JavaGateway, java_import - - -def test_gateway_connect(): - """Test weather client could connect java gate way or not.""" - gateway = JavaGateway() - app = gateway.entry_point - assert app.ping() == "PONG" - - -def test_jvm_simple(): - """Test use JVM build-in object and operator from java gateway.""" - gateway = JavaGateway() - smallest = gateway.jvm.java.lang.Integer.MIN_VALUE - biggest = gateway.jvm.java.lang.Integer.MAX_VALUE - assert smallest is not None and biggest is not None - assert biggest > smallest - - -def test_python_client_java_import_single(): - """Test import single class from java gateway.""" - gateway = JavaGateway() - java_import(gateway.jvm, "org.apache.dolphinscheduler.common.utils.FileUtils") - assert hasattr(gateway.jvm, "FileUtils") - - -def test_python_client_java_import_package(): - """Test import package contain multiple class from java gateway.""" - gateway = JavaGateway() - java_import(gateway.jvm, "org.apache.dolphinscheduler.common.utils.*") - # test if jvm view have some common utils - for util in ("FileUtils", "OSUtils", "DateUtils"): - assert hasattr(gateway.jvm, util) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_process_definition.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_process_definition.py deleted file mode 100644 index 1672bde530..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_process_definition.py +++ /dev/null @@ -1,50 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test process definition in integration.""" - -from typing import Dict - -import pytest - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.shell import Shell - -PROCESS_DEFINITION_NAME = "test_change_exists_attr_pd" -TASK_NAME = f"task_{PROCESS_DEFINITION_NAME}" - - -@pytest.mark.parametrize( - "pre, post", - [ - ( - { - "user": "pre_user", - }, - { - "user": "post_user", - }, - ) - ], -) -def test_change_process_definition_attr(pre: Dict, post: Dict): - """Test whether process definition success when specific attribute change.""" - assert pre.keys() == post.keys(), "Not equal keys for pre and post attribute." - for attrs in [pre, post]: - with ProcessDefinition(name=PROCESS_DEFINITION_NAME, **attrs) as pd: - Shell(name=TASK_NAME, command="echo 1") - pd.submit() diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_project.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_project.py deleted file mode 100644 index 167ce2d8c9..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_project.py +++ /dev/null @@ -1,78 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test pydolphinscheduler project.""" -import pytest - -from pydolphinscheduler.models import Project, User - - -def get_user( - name="test-name", - password="test-password", - email="test-email@abc.com", - phone="17366637777", - tenant="test-tenant", - queue="test-queue", - status=1, -): - """Get a test user.""" - user = User(name, password, email, phone, tenant, queue, status) - user.create_if_not_exists() - return user - - -def get_project(name="test-name-1", description="test-description", code=1): - """Get a test project.""" - project = Project(name, description, code=code) - user = get_user() - project.create_if_not_exists(user=user.name) - return project - - -def test_create_and_get_project(): - """Test create and get project from java gateway.""" - project = get_project() - project_ = Project.get_project_by_name(user="test-name", name=project.name) - assert project_.name == project.name - assert project_.description == project.description - - -def test_update_project(): - """Test update project from java gateway.""" - project = get_project() - project = project.get_project_by_name(user="test-name", name=project.name) - project.update( - user="test-name", - project_code=project.code, - project_name="test-name-updated", - description="test-description-updated", - ) - project_ = Project.get_project_by_name(user="test-name", name="test-name-updated") - assert project_.description == "test-description-updated" - - -def test_delete_project(): - """Test delete project from java gateway.""" - project = get_project() - project.get_project_by_name(user="test-name", name=project.name) - project.delete(user="test-name") - - with pytest.raises(AttributeError) as excinfo: - _ = project.name - - assert excinfo.type == AttributeError diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_submit_examples.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_submit_examples.py deleted file mode 100644 index 393b0cc99a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_submit_examples.py +++ /dev/null @@ -1,56 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test whether success submit examples DAG to PythonGatewayService.""" - -import subprocess -from pathlib import Path - -import pytest - -from tests.testing.constants import ignore_exec_examples -from tests.testing.path import path_example - - -@pytest.mark.parametrize( - "example_path", - [ - path - for path in path_example.iterdir() - if path.is_file() and path.stem not in ignore_exec_examples - ], -) -def test_exec_white_list_example(example_path: Path): - """Test execute examples and submit DAG to PythonGatewayService.""" - try: - # Because our task decorator used module ``inspect`` to get the source, and it will - # raise IOError when call it by built-in function ``exec``, so we change to ``subprocess.check_call`` - subprocess.check_call(["python", str(example_path)]) - except subprocess.CalledProcessError: - raise RuntimeError("Run example %s failed.", example_path.stem) - - -def test_exec_multiple_times(): - """Test whether process definition can be executed more than one times.""" - tutorial_path = path_example.joinpath("tutorial.py") - time = 0 - while time < 3: - try: - subprocess.check_call(["python", str(tutorial_path)]) - except subprocess.CalledProcessError: - raise RuntimeError("Run example %s failed.", tutorial_path.stem) - time += 1 diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_tenant.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_tenant.py deleted file mode 100644 index c1ec33c335..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_tenant.py +++ /dev/null @@ -1,86 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test pydolphinscheduler tenant.""" -import pytest - -from pydolphinscheduler.models import Tenant, User - - -def get_user( - name="test-name", - password="test-password", - email="test-email@abc.com", - phone="17366637777", - tenant="test-tenant", - queue="test-queue", - status=1, -): - """Get a test user.""" - user = User(name, password, email, phone, tenant, queue, status) - user.create_if_not_exists() - return user - - -def get_tenant( - name="test-name-1", - queue="test-queue-1", - description="test-description", - tenant_code="test-tenant-code", - user_name=None, -): - """Get a test tenant.""" - tenant = Tenant(name, queue, description, code=tenant_code, user_name=user_name) - tenant.create_if_not_exists(name) - return tenant - - -def test_create_tenant(): - """Test create tenant from java gateway.""" - tenant = get_tenant() - assert tenant.tenant_id is not None - - -def test_get_tenant(): - """Test get tenant from java gateway.""" - tenant = get_tenant() - tenant_ = Tenant.get_tenant(tenant.code) - assert tenant_.tenant_id == tenant.tenant_id - - -def test_update_tenant(): - """Test update tenant from java gateway.""" - tenant = get_tenant(user_name="admin") - tenant.update( - user="admin", - code="test-code-updated", - queue_id=1, - description="test-description-updated", - ) - tenant_ = Tenant.get_tenant(code=tenant.code) - assert tenant_.code == "test-code-updated" - assert tenant_.queue == 1 - - -def test_delete_tenant(): - """Test delete tenant from java gateway.""" - tenant = get_tenant(user_name="admin") - tenant.delete() - with pytest.raises(AttributeError) as excinfo: - _ = tenant.tenant_id - - assert excinfo.type == AttributeError diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_user.py b/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_user.py deleted file mode 100644 index 74248fa8c3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/integration/test_user.py +++ /dev/null @@ -1,107 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test pydolphinscheduler user.""" - -import hashlib - -import pytest - -from pydolphinscheduler.models import User - - -def md5(str): - """MD5 a string.""" - hl = hashlib.md5() - hl.update(str.encode(encoding="utf-8")) - return hl.hexdigest() - - -def get_user( - name="test-name", - password="test-password", - email="test-email@abc.com", - phone="17366637777", - tenant="test-tenant", - queue="test-queue", - status=1, -): - """Get a test user.""" - user = User( - name=name, - password=password, - email=email, - phone=phone, - tenant=tenant, - queue=queue, - status=status, - ) - user.create_if_not_exists() - return user - - -def test_create_user(): - """Test weather client could connect java gate way or not.""" - user = User( - name="test-name", - password="test-password", - email="test-email@abc.com", - phone="17366637777", - tenant="test-tenant", - queue="test-queue", - status=1, - ) - user.create_if_not_exists() - assert user.user_id is not None - - -def test_get_user(): - """Test get user from java gateway.""" - user = get_user() - user_ = User.get_user(user.user_id) - assert user_.password == md5(user.password) - assert user_.email == user.email - assert user_.phone == user.phone - assert user_.status == user.status - - -def test_update_user(): - """Test update user from java gateway.""" - user = get_user() - user.update( - password="test-password-", - email="test-email-updated@abc.com", - phone="17366637766", - tenant="test-tenant-updated", - queue="test-queue-updated", - status=2, - ) - user_ = User.get_user(user.user_id) - assert user_.password == md5("test-password-") - assert user_.email == "test-email-updated@abc.com" - assert user_.phone == "17366637766" - assert user_.status == 2 - - -def test_delete_user(): - """Test delete user from java gateway.""" - user = get_user() - user.delete() - with pytest.raises(AttributeError) as excinfo: - _ = user.user_id - - assert excinfo.type == AttributeError diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/__init__.py deleted file mode 100644 index 0b6bdf360b..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init resources_plugin package tests.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py deleted file mode 100644 index 1f1a631649..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py +++ /dev/null @@ -1,195 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test github resource plugin.""" -from unittest.mock import PropertyMock, patch - -import pytest - -from pydolphinscheduler.resources_plugin import GitHub -from pydolphinscheduler.resources_plugin.base.git import GitFileInfo - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "user": "apache", - "repo_name": "dolphinscheduler", - "file_path": "script/install.sh", - "api": "https://api.github.com/repos/{user}/{repo_name}/contents/{file_path}", - }, - "https://api.github.com/repos/apache/dolphinscheduler/contents/script/install.sh", - ), - ], -) -def test_github_build_req_api(attr, expected): - """Test the build_req_api function of the github resource plug-in.""" - github = GitHub(prefix="prefix") - assert expected == github.build_req_api(**attr) - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - "https://github.com/apache/dolphinscheduler/blob/dev/script/install.sh", - { - "user": "apache", - "repo_name": "dolphinscheduler", - "branch": "dev", - "file_path": "script/install.sh", - }, - ), - ( - "https://github.com/apache/dolphinscheduler/blob/master/pom.xml", - { - "user": "apache", - "repo_name": "dolphinscheduler", - "branch": "master", - "file_path": "pom.xml", - }, - ), - ( - "https://github.com/apache/dolphinscheduler/blob/1.3.9-release/docker/build/startup.sh", - { - "user": "apache", - "repo_name": "dolphinscheduler", - "branch": "1.3.9-release", - "file_path": "docker/build/startup.sh", - }, - ), - ], -) -def test_github_get_git_file_info(attr, expected): - """Test the get_git_file_info function of the github resource plug-in.""" - github = GitHub(prefix="prefix") - github.get_git_file_info(attr) - assert expected == github._git_file_info.__dict__ - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - ( - { - "user": "apache", - "repo_name": "dolphinscheduler", - "file_path": "docker/build/startup.sh", - } - ), - "https://api.github.com/repos/apache/dolphinscheduler/contents/docker/build/startup.sh", - ), - ( - ( - { - "user": "apache", - "repo_name": "dolphinscheduler", - "file_path": "pom.xml", - } - ), - "https://api.github.com/repos/apache/dolphinscheduler/contents/pom.xml", - ), - ( - ( - { - "user": "apache", - "repo_name": "dolphinscheduler", - "file_path": "script/create-dolphinscheduler.sh", - } - ), - "https://api.github.com/repos/apache/dolphinscheduler/contents/script/create-dolphinscheduler.sh", - ), - ], -) -@patch( - "pydolphinscheduler.resources_plugin.github.GitHub._git_file_info", - new_callable=PropertyMock, -) -def test_github_get_req_url(m_git_file_info, attr, expected): - """Test the get_req_url function of the github resource plug-in.""" - github = GitHub(prefix="prefix") - m_git_file_info.return_value = GitFileInfo(**attr) - assert expected == github.get_req_url() - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "init": {"prefix": "prefix", "access_token": "access_token"}, - "file_path": "github_resource_plugin.sh", - "file_content": "github resource plugin", - }, - "github resource plugin", - ), - ( - { - "init": { - "prefix": "prefix", - }, - "file_path": "github_resource_plugin.sh", - "file_content": "github resource plugin", - }, - "github resource plugin", - ), - ], -) -@patch("pydolphinscheduler.resources_plugin.github.GitHub.req") -def test_github_read_file(m_req, attr, expected): - """Test the read_file function of the github resource plug-in.""" - github = GitHub(**attr.get("init")) - m_req.return_value = attr.get("file_content") - assert expected == github.read_file(attr.get("file_path")) - - -@pytest.mark.skip(reason="Lack of test environment, need stable repository") -@pytest.mark.parametrize( - "attr, expected", - [ - ( - "https://github.com/apache/dolphinscheduler/blob/dev/lombok.config", - "#\n" - "# Licensed to the Apache Software Foundation (ASF) under one or more\n" - "# contributor license agreements. See the NOTICE file distributed with\n" - "# this work for additional information regarding copyright ownership.\n" - "# The ASF licenses this file to You under the Apache License, Version 2.0\n" - '# (the "License"); you may not use this file except in compliance with\n' - "# the License. You may obtain a copy of the License at\n" - "#\n" - "# http://www.apache.org/licenses/LICENSE-2.0\n" - "#\n" - "# Unless required by applicable law or agreed to in writing, software\n" - '# distributed under the License is distributed on an "AS IS" BASIS,\n' - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" - "# See the License for the specific language governing permissions and\n" - "# limitations under the License.\n" - "#\n" - "\n" - "lombok.addLombokGeneratedAnnotation = true\n", - ), - ], -) -def test_github_req(attr, expected): - """Test the req function of the github resource plug-in.""" - github = GitHub( - prefix="prefix", - ) - assert expected == github.req(attr) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_gitlab.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_gitlab.py deleted file mode 100644 index 6bb90acc72..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_gitlab.py +++ /dev/null @@ -1,116 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test github resource plugin.""" -import pytest - -from pydolphinscheduler.resources_plugin.gitlab import GitLab - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - "https://gitlab.com/pydolphinscheduler/ds-gitlab/-/blob/main/union.sh", - { - "branch": "main", - "file_path": "union.sh", - "host": "https://gitlab.com", - "repo_name": "ds-gitlab", - "user": "pydolphinscheduler", - }, - ), - ( - "https://gitlab.com/pydolphinscheduler/ds/-/blob/dev/test/exc.sh", - { - "branch": "dev", - "file_path": "test/exc.sh", - "host": "https://gitlab.com", - "repo_name": "ds", - "user": "pydolphinscheduler", - }, - ), - ], -) -def test_gitlab_get_git_file_info(attr, expected): - """Test the get_file_info function of the gitlab resource plugin.""" - gitlab = GitLab(prefix="prefix") - gitlab.get_git_file_info(attr) - assert expected == gitlab._git_file_info.__dict__ - - -@pytest.mark.skip(reason="This test needs gitlab service") -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "init": { - "prefix": "https://gitlab.com/pydolphinscheduler/ds-internal/-/blob/main", - "oauth_token": "24518bd4cf5bfe9xx", - }, - "file_path": "union.sh", - }, - "test gitlab resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://gitlab.com/pydolphinscheduler/ds/-/blob/main", - "private_token": "9TyTe2xx", - }, - "file_path": "union.sh", - }, - "test gitlab resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://gitlab.com/pydolphinscheduler/ds-gitlab/-/blob/main", - "username": "pydolphinscheduler", - "password": "4295xx", - }, - "file_path": "union.sh", - }, - "test gitlab resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://gitlab.com/pydolphinscheduler/ds-public/-/blob/main", - }, - "file_path": "union.sh", - }, - "test gitlab resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://gitlab.com/pydolphinscheduler/ds-internal/-/blob/main", - "username": "pydolphinscheduler", - "password": "429xxx", - }, - "file_path": "union.sh", - }, - "test gitlab resource plugin\n", - ), - ], -) -def test_gitlab_read_file(attr, expected): - """Test the read_file function of the gitlab resource plug-in.""" - gitlab = GitLab(**attr.get("init")) - assert expected == gitlab.read_file(attr.get("file_path")) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_local.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_local.py deleted file mode 100644 index 82b196f75a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_local.py +++ /dev/null @@ -1,108 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test local resource plugin.""" -from pathlib import Path -from unittest.mock import PropertyMock, patch - -import pytest - -from pydolphinscheduler.core import Task -from pydolphinscheduler.exceptions import PyResPluginException -from pydolphinscheduler.resources_plugin.local import Local -from pydolphinscheduler.utils import file -from tests.testing.file import delete_file - -file_name = "local_res.sh" -file_content = "echo Test local res plugin" -res_plugin_prefix = Path(__file__).parent -file_path = res_plugin_prefix.joinpath(file_name) - - -@pytest.fixture() -def setup_crt_first(): - """Set up and teardown about create file first and then delete it.""" - file.write(content=file_content, to_path=file_path) - yield - delete_file(file_path) - - -@pytest.mark.parametrize( - "val, expected", - [ - (file_name, file_content), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.task.Task.ext", - new_callable=PropertyMock, - return_value={ - ".sh", - }, -) -@patch( - "pydolphinscheduler.core.task.Task.ext_attr", - new_callable=PropertyMock, - return_value="_raw_script", -) -@patch( - "pydolphinscheduler.core.task.Task._raw_script", - create=True, - new_callable=PropertyMock, -) -def test_task_obtain_res_plugin( - m_raw_script, m_ext_attr, m_ext, m_code_version, val, expected, setup_crt_first -): - """Test task obtaining resource plug-in.""" - m_raw_script.return_value = val - task = Task( - name="test_task_ext_attr", - task_type="type", - resource_plugin=Local(str(res_plugin_prefix)), - ) - assert expected == getattr(task, "raw_script") - - -@pytest.mark.parametrize( - "attr, expected", - [({"prefix": res_plugin_prefix, "file_name": file_name}, file_content)], -) -def test_local_res_read_file(attr, expected, setup_crt_first): - """Test the read_file function of the local resource plug-in.""" - local = Local(str(attr.get("prefix"))) - local.read_file(attr.get("file_name")) - assert expected == local.read_file(file_name) - - -@pytest.mark.parametrize( - "attr", - [ - {"prefix": res_plugin_prefix, "file_name": file_name}, - ], -) -def test_local_res_file_not_found(attr): - """Test local resource plugin file does not exist.""" - with pytest.raises( - PyResPluginException, - match=".* is not found", - ): - local = Local(str(attr.get("prefix"))) - local.read_file(attr.get("file_name")) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_oss.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_oss.py deleted file mode 100644 index 7e57e8230e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_oss.py +++ /dev/null @@ -1,112 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test oss resource plugin.""" -import pytest - -from pydolphinscheduler.resources_plugin.oss import OSS - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - "https://ospp-ds-private.oss-cn-hangzhou.aliyuncs.com/a.sh", - { - "endpoint": "https://oss-cn-hangzhou.aliyuncs.com", - "file_path": "a.sh", - "bucket": "ospp-ds-private", - }, - ), - ( - "https://ospp-ds-public.oss-cn-hangzhou.aliyuncs.com/dir/a.sh", - { - "endpoint": "https://oss-cn-hangzhou.aliyuncs.com", - "file_path": "dir/a.sh", - "bucket": "ospp-ds-public", - }, - ), - ], -) -def test_oss_get_bucket_file_info(attr, expected): - """Test the get_bucket_file_info function of the oss resource plugin.""" - oss = OSS(prefix="prefix") - oss.get_bucket_file_info(attr) - assert expected == oss._bucket_file_info.__dict__ - - -@pytest.mark.skip(reason="This test requires OSS services") -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "init": { - "prefix": "https://ospp-ds-private.oss-cn-hangzhou.aliyuncs.com", - "access_key_id": "LTAI5tP25Mxx", - "access_key_secret": "cSur23Qbxx", - }, - "file_path": "a.sh", - }, - "test oss resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://ospp-ds-private.oss-cn-hangzhou.aliyuncs.com/dir/", - "access_key_id": "LTAxx", - "access_key_secret": "cSur23Qxx", - }, - "file_path": "b.sh", - }, - "test oss resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://ospp-ds-private.oss-cn-hangzhou.aliyuncs.com", - }, - "file_path": "b.sh", - }, - "test oss resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://ospp-ds-public.oss-cn-hangzhou.aliyuncs.com", - }, - "file_path": "b.sh", - }, - "test oss resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://ospp-ds-public.oss-cn-hangzhou.aliyuncs.com/dir/", - "access_key_id": "LTAIxx", - "access_key_secret": "cSurxx", - }, - "file_path": "a.sh", - }, - "test oss resource plugin\n", - ), - ], -) -def test_oss_read_file(attr, expected): - """Test the read_file function of the oss resource plug-in.""" - oss = OSS(**attr.get("init")) - assert expected == oss.read_file(attr.get("file_path")) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_resource_plugin.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_resource_plugin.py deleted file mode 100644 index 63e619a600..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_resource_plugin.py +++ /dev/null @@ -1,75 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test abstract class resource_plugin.""" - -import pytest - -from pydolphinscheduler.exceptions import PyResPluginException -from pydolphinscheduler.resources_plugin import GitHub - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "s": "https://api.github.com/repos/apache/dolphinscheduler/contents/script/install.sh", - "x": "/", - "n": 2, - }, - 7, - ), - ( - { - "s": "https://api.github.com", - "x": ":", - "n": 1, - }, - 5, - ), - ], -) -def test_github_get_index(attr, expected): - """Test the get_index function of the abstract class resource_plugin.""" - github = GitHub(prefix="prefix") - assert expected == github.get_index(**attr) - - -@pytest.mark.parametrize( - "attr", - [ - { - "s": "https://api.github.com", - "x": "/", - "n": 3, - }, - { - "s": "https://api.github.com/", - "x": "/", - "n": 4, - }, - ], -) -def test_github_get_index_exception(attr): - """Test exception to get_index function of abstract class resource_plugin.""" - with pytest.raises( - PyResPluginException, - match="Incomplete path.", - ): - github = GitHub(prefix="prefix") - github.get_index(**attr) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_s3.py b/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_s3.py deleted file mode 100644 index 5f75f3eb75..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_s3.py +++ /dev/null @@ -1,79 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test oss resource plugin.""" -import pytest - -from pydolphinscheduler.resources_plugin import S3 - - -@pytest.mark.parametrize( - "attr, expected", - [ - ( - "https://ds-resource-plugin-private.s3.amazonaws.com/a.sh", - { - "file_path": "a.sh", - "bucket": "ds-resource-plugin-private", - }, - ), - ( - "https://ds-resource-plugin-public.s3.amazonaws.com/dir/a.sh", - { - "file_path": "dir/a.sh", - "bucket": "ds-resource-plugin-public", - }, - ), - ], -) -def test_s3_get_bucket_file_info(attr, expected): - """Test the get_bucket_file_info function of the s3 resource plugin.""" - s3 = S3(prefix="prefix") - s3.get_bucket_file_info(attr) - assert expected == s3._bucket_file_info.__dict__ - - -@pytest.mark.skip(reason="This test requires s3 services") -@pytest.mark.parametrize( - "attr, expected", - [ - ( - { - "init": { - "prefix": "https://ds-resource-plugin-private.s3.amazonaws.com/dir/", - "access_key_id": "LTAI5tP25Mxx", - "access_key_secret": "cSur23Qbxx", - }, - "file_path": "a.sh", - }, - "test s3 resource plugin\n", - ), - ( - { - "init": { - "prefix": "https://ds-resource-plugin-public.s3.amazonaws.com/", - }, - "file_path": "a.sh", - }, - "test s3 resource plugin\n", - ), - ], -) -def test_s3_read_file(attr, expected): - """Test the read_file function of the s3 resource plug-in.""" - s3 = S3(**attr.get("init")) - assert expected == s3.read_file(attr.get("file_path")) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/__init__.py deleted file mode 100644 index 095e3013e5..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init tasks package tests.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py deleted file mode 100644 index 72eec28ed7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py +++ /dev/null @@ -1,461 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task dependent.""" -from typing import List, Tuple -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.tasks.condition import ( - FAILURE, - SUCCESS, - And, - Condition, - ConditionOperator, - Or, - Status, -) -from tests.testing.task import Task - -TEST_NAME = "test-name" -TEST_PROJECT = "test-project" -TEST_PROCESS_DEFINITION = "test-process-definition" -TEST_TYPE = "test-type" -TEST_PROJECT_CODE, TEST_DEFINITION_CODE, TEST_TASK_CODE = 12345, 123456, 1234567 - -TEST_OPERATOR_LIST = ("AND", "OR") - - -@pytest.mark.parametrize( - "obj, expect", - [ - (Status, "STATUS"), - (SUCCESS, "SUCCESS"), - (FAILURE, "FAILURE"), - ], -) -def test_class_status_status_name(obj: Status, expect: str): - """Test class status and sub class property status_name.""" - assert obj.status_name() == expect - - -@pytest.mark.parametrize( - "obj, tasks", - [ - (Status, (1, 2, 3)), - (SUCCESS, (1.1, 2.2, 3.3)), - (FAILURE, (ConditionOperator(1), ConditionOperator(2), ConditionOperator(3))), - ], -) -def test_class_status_depend_item_list_no_expect_type(obj: Status, tasks: Tuple): - """Test class status and sub class raise error when assign not support type.""" - with pytest.raises( - PyDSParamException, match=".*?only accept class Task or sub class Task, but get" - ): - obj(*tasks).get_define() - - -@pytest.mark.parametrize( - "obj, tasks", - [ - (Status, [Task(str(i), TEST_TYPE) for i in range(1)]), - (Status, [Task(str(i), TEST_TYPE) for i in range(2)]), - (Status, [Task(str(i), TEST_TYPE) for i in range(3)]), - (SUCCESS, [Task(str(i), TEST_TYPE) for i in range(1)]), - (SUCCESS, [Task(str(i), TEST_TYPE) for i in range(2)]), - (SUCCESS, [Task(str(i), TEST_TYPE) for i in range(3)]), - (FAILURE, [Task(str(i), TEST_TYPE) for i in range(1)]), - (FAILURE, [Task(str(i), TEST_TYPE) for i in range(2)]), - (FAILURE, [Task(str(i), TEST_TYPE) for i in range(3)]), - ], -) -def test_class_status_depend_item_list(obj: Status, tasks: Tuple): - """Test class status and sub class function :func:`depend_item_list`.""" - status = obj.status_name() - expect = [ - { - "depTaskCode": i.code, - "status": status, - } - for i in tasks - ] - assert obj(*tasks).get_define() == expect - - -@pytest.mark.parametrize( - "obj, expect", - [ - (ConditionOperator, "CONDITIONOPERATOR"), - (And, "AND"), - (Or, "OR"), - ], -) -def test_condition_operator_operator_name(obj: ConditionOperator, expect: str): - """Test class ConditionOperator and sub class class function :func:`operator_name`.""" - assert obj.operator_name() == expect - - -@pytest.mark.parametrize( - "obj, expect", - [ - (ConditionOperator, "CONDITIONOPERATOR"), - (And, "AND"), - (Or, "OR"), - ], -) -def test_condition_operator_relation(obj: ConditionOperator, expect: str): - """Test class ConditionOperator and sub class class property `relation`.""" - assert obj(1).relation == expect - - -@pytest.mark.parametrize( - "obj, status_or_operator, match", - [ - ( - ConditionOperator, - [Status(Task("1", TEST_TYPE)), 1], - ".*?operator parameter support ConditionTask and ConditionOperator.*?", - ), - ( - ConditionOperator, - [ - Status(Task("1", TEST_TYPE)), - 1.0, - ], - ".*?operator parameter support ConditionTask and ConditionOperator.*?", - ), - ( - ConditionOperator, - [ - Status(Task("1", TEST_TYPE)), - ConditionOperator(And(Status(Task("1", TEST_TYPE)))), - ], - ".*?operator parameter only support same type.", - ), - ( - ConditionOperator, - [ - ConditionOperator(And(Status(Task("1", TEST_TYPE)))), - Status(Task("1", TEST_TYPE)), - ], - ".*?operator parameter only support same type.", - ), - ], -) -def test_condition_operator_set_define_attr_not_support_type( - obj, status_or_operator, match -): - """Test class ConditionOperator parameter error, including parameter not same or type not support.""" - with pytest.raises(PyDSParamException, match=match): - op = obj(*status_or_operator) - op.set_define_attr() - - -@pytest.mark.parametrize( - "obj, task_num", - [ - (ConditionOperator, 1), - (ConditionOperator, 2), - (ConditionOperator, 3), - (And, 1), - (And, 2), - (And, 3), - (Or, 1), - (Or, 2), - (Or, 3), - ], -) -def test_condition_operator_set_define_attr_status( - obj: ConditionOperator, task_num: int -): - """Test :func:`set_define_attr` with one or more class status.""" - attr = "depend_item_list" - - tasks = [Task(str(i), TEST_TYPE) for i in range(task_num)] - status = Status(*tasks) - - expect = [ - {"depTaskCode": task.code, "status": status.status_name()} for task in tasks - ] - - co = obj(status) - co.set_define_attr() - assert getattr(co, attr) == expect - - -@pytest.mark.parametrize( - "obj, status", - [ - (ConditionOperator, (SUCCESS, SUCCESS)), - (ConditionOperator, (FAILURE, FAILURE)), - (ConditionOperator, (SUCCESS, FAILURE)), - (ConditionOperator, (FAILURE, SUCCESS)), - (And, (SUCCESS, SUCCESS)), - (And, (FAILURE, FAILURE)), - (And, (SUCCESS, FAILURE)), - (And, (FAILURE, SUCCESS)), - (Or, (SUCCESS, SUCCESS)), - (Or, (FAILURE, FAILURE)), - (Or, (SUCCESS, FAILURE)), - (Or, (FAILURE, SUCCESS)), - ], -) -def test_condition_operator_set_define_attr_mix_status( - obj: ConditionOperator, status: List[Status] -): - """Test :func:`set_define_attr` with one or more mixed status.""" - attr = "depend_item_list" - - task = Task("test-operator", TEST_TYPE) - status_list = [] - expect = [] - for sta in status: - status_list.append(sta(task)) - expect.append({"depTaskCode": task.code, "status": sta.status_name()}) - - co = obj(*status_list) - co.set_define_attr() - assert getattr(co, attr) == expect - - -@pytest.mark.parametrize( - "obj, task_num", - [ - (ConditionOperator, 1), - (ConditionOperator, 2), - (ConditionOperator, 3), - (And, 1), - (And, 2), - (And, 3), - (Or, 1), - (Or, 2), - (Or, 3), - ], -) -def test_condition_operator_set_define_attr_operator( - obj: ConditionOperator, task_num: int -): - """Test :func:`set_define_attr` with one or more class condition operator.""" - attr = "depend_task_list" - - task = Task("test-operator", TEST_TYPE) - status = Status(task) - - expect = [ - { - "relation": obj.operator_name(), - "dependItemList": [ - { - "depTaskCode": task.code, - "status": status.status_name(), - } - ], - } - for _ in range(task_num) - ] - - co = obj(*[obj(status) for _ in range(task_num)]) - co.set_define_attr() - assert getattr(co, attr) == expect - - -@pytest.mark.parametrize( - "cond, sub_cond", - [ - (ConditionOperator, (And, Or)), - (ConditionOperator, (Or, And)), - (And, (And, Or)), - (And, (Or, And)), - (Or, (And, Or)), - (Or, (Or, And)), - ], -) -def test_condition_operator_set_define_attr_mix_operator( - cond: ConditionOperator, sub_cond: Tuple[ConditionOperator] -): - """Test :func:`set_define_attr` with one or more class mix condition operator.""" - attr = "depend_task_list" - - task = Task("test-operator", TEST_TYPE) - - expect = [] - sub_condition = [] - for cond in sub_cond: - status = Status(task) - sub_condition.append(cond(status)) - expect.append( - { - "relation": cond.operator_name(), - "dependItemList": [ - { - "depTaskCode": task.code, - "status": status.status_name(), - } - ], - } - ) - co = cond(*sub_condition) - co.set_define_attr() - assert getattr(co, attr) == expect - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(12345, 1), -) -@patch( - "pydolphinscheduler.tasks.condition.Condition.gen_code_and_version", - return_value=(123, 1), -) -def test_condition_get_define(mock_condition_code_version, mock_task_code_version): - """Test task condition :func:`get_define`.""" - common_task = Task(name="common_task", task_type="test_task_condition") - cond_operator = And( - And( - SUCCESS(common_task, common_task), - FAILURE(common_task, common_task), - ), - Or( - SUCCESS(common_task, common_task), - FAILURE(common_task, common_task), - ), - ) - - name = "test_condition_get_define" - expect = { - "code": 123, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "CONDITIONS", - "taskParams": { - "resourceList": [], - "localParams": [], - "dependence": { - "relation": "AND", - "dependTaskList": [ - { - "relation": "AND", - "dependItemList": [ - {"depTaskCode": common_task.code, "status": "SUCCESS"}, - {"depTaskCode": common_task.code, "status": "SUCCESS"}, - {"depTaskCode": common_task.code, "status": "FAILURE"}, - {"depTaskCode": common_task.code, "status": "FAILURE"}, - ], - }, - { - "relation": "OR", - "dependItemList": [ - {"depTaskCode": common_task.code, "status": "SUCCESS"}, - {"depTaskCode": common_task.code, "status": "SUCCESS"}, - {"depTaskCode": common_task.code, "status": "FAILURE"}, - {"depTaskCode": common_task.code, "status": "FAILURE"}, - ], - }, - ], - }, - "conditionResult": { - "successNode": [common_task.code], - "failedNode": [common_task.code], - }, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - - task = Condition( - name, condition=cond_operator, success_task=common_task, failed_task=common_task - ) - assert task.get_define() == expect - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_condition_set_dep_workflow(mock_task_code_version): - """Test task condition set dependence in workflow level.""" - with ProcessDefinition(name="test-condition-set-dep-workflow") as pd: - pre_task_1 = Task(name="pre_task_1", task_type=TEST_TYPE) - pre_task_2 = Task(name="pre_task_2", task_type=TEST_TYPE) - pre_task_3 = Task(name="pre_task_3", task_type=TEST_TYPE) - cond_operator = And( - And( - SUCCESS(pre_task_1, pre_task_2), - FAILURE(pre_task_3), - ), - ) - - success_branch = Task(name="success_branch", task_type=TEST_TYPE) - fail_branch = Task(name="fail_branch", task_type=TEST_TYPE) - - condition = Condition( - name="conditions", - condition=cond_operator, - success_task=success_branch, - failed_task=fail_branch, - ) - - # General tasks test - assert len(pd.tasks) == 6 - assert sorted(pd.task_list, key=lambda t: t.name) == sorted( - [ - pre_task_1, - pre_task_2, - pre_task_3, - success_branch, - fail_branch, - condition, - ], - key=lambda t: t.name, - ) - # Task dep test - assert success_branch._upstream_task_codes == {condition.code} - assert fail_branch._upstream_task_codes == {condition.code} - assert condition._downstream_task_codes == { - success_branch.code, - fail_branch.code, - } - - # Condition task dep after ProcessDefinition function get_define called - assert condition._upstream_task_codes == { - pre_task_1.code, - pre_task_2.code, - pre_task_3.code, - } - assert all( - [ - child._downstream_task_codes == {condition.code} - for child in [ - pre_task_1, - pre_task_2, - pre_task_3, - ] - ] - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_datax.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_datax.py deleted file mode 100644 index 95f65b3155..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_datax.py +++ /dev/null @@ -1,213 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task DataX.""" -from pathlib import Path -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.resources_plugin import Local -from pydolphinscheduler.tasks.datax import CustomDataX, DataX -from pydolphinscheduler.utils import file -from tests.testing.file import delete_file - - -@pytest.fixture() -def setup_crt_first(request): - """Set up and teardown about create file first and then delete it.""" - file_content = request.param.get("file_content") - file_path = request.param.get("file_path") - file.write( - content=file_content, - to_path=file_path, - ) - yield - delete_file(file_path) - - -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "MYSQL"}), -) -def test_datax_get_define(mock_datasource): - """Test task datax function get_define.""" - code = 123 - version = 1 - name = "test_datax_get_define" - command = "select name from test_source_table_name" - datasource_name = "test_datasource" - datatarget_name = "test_datatarget" - target_table = "test_target_table_name" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "DATAX", - "taskParams": { - "customConfig": 0, - "dsType": "MYSQL", - "dataSource": 1, - "dtType": "MYSQL", - "dataTarget": 1, - "sql": command, - "targetTable": target_table, - "jobSpeedByte": 0, - "jobSpeedRecord": 1000, - "xms": 1, - "xmx": 1, - "preStatements": [], - "postStatements": [], - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = DataX(name, datasource_name, datatarget_name, command, target_table) - assert task.get_define() == expect - - -@pytest.mark.parametrize("json_template", ["json_template"]) -def test_custom_datax_get_define(json_template): - """Test task custom datax function get_define.""" - code = 123 - version = 1 - name = "test_custom_datax_get_define" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "DATAX", - "taskParams": { - "customConfig": 1, - "json": json_template, - "xms": 1, - "xmx": 1, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = CustomDataX(name, json_template) - assert task.get_define() == expect - - -@pytest.mark.parametrize( - "setup_crt_first", - [ - { - "file_path": Path(__file__).parent.joinpath("local_res.sql"), - "file_content": "test local resource", - } - ], - indirect=True, -) -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "task_datax", - "datasource_name": "first_mysql", - "datatarget_name": "second_mysql", - "sql": "local_res.sql", - "target_table": "target_table", - "resource_plugin": Local(str(Path(__file__).parent)), - }, - "test local resource", - ), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_resources_local_datax_command_content( - mock_code_version, attr, expect, setup_crt_first -): - """Test task datax sql content through the local resource plug-in.""" - datax = DataX(**attr) - assert expect == getattr(datax, "sql") - - -@pytest.mark.parametrize( - "setup_crt_first", - [ - { - "file_path": Path(__file__).parent.joinpath("local_res.json"), - "file_content": '{content: "test local resource"}', - } - ], - indirect=True, -) -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "task_custom_datax", - "json": "local_res.json", - "resource_plugin": Local(str(Path(__file__).parent)), - }, - '{content: "test local resource"}', - ), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_resources_local_custom_datax_command_content( - mock_code_version, attr, expect, setup_crt_first -): - """Test task CustomDataX json content through the local resource plug-in.""" - custom_datax = CustomDataX(**attr) - assert expect == getattr(custom_datax, "json") diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dependent.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dependent.py deleted file mode 100644 index f55700e04b..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dependent.py +++ /dev/null @@ -1,794 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task dependent.""" -import itertools -from typing import Dict, List, Optional, Tuple, Union -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.tasks.dependent import ( - And, - Dependent, - DependentDate, - DependentItem, - DependentOperator, - Or, -) - -TEST_PROJECT = "test-project" -TEST_PROCESS_DEFINITION = "test-process-definition" -TEST_TASK = "test-task" -TEST_PROJECT_CODE, TEST_DEFINITION_CODE, TEST_TASK_CODE = 12345, 123456, 1234567 - -TEST_OPERATOR_LIST = ("AND", "OR") - - -@pytest.mark.parametrize( - "dep_date, dep_cycle", - [ - # hour - (DependentDate.CURRENT_HOUR, "hour"), - (DependentDate.LAST_ONE_HOUR, "hour"), - (DependentDate.LAST_TWO_HOURS, "hour"), - (DependentDate.LAST_THREE_HOURS, "hour"), - (DependentDate.LAST_TWENTY_FOUR_HOURS, "hour"), - # day - (DependentDate.TODAY, "day"), - (DependentDate.LAST_ONE_DAYS, "day"), - (DependentDate.LAST_TWO_DAYS, "day"), - (DependentDate.LAST_THREE_DAYS, "day"), - (DependentDate.LAST_SEVEN_DAYS, "day"), - # week - (DependentDate.THIS_WEEK, "week"), - (DependentDate.LAST_WEEK, "week"), - (DependentDate.LAST_MONDAY, "week"), - (DependentDate.LAST_TUESDAY, "week"), - (DependentDate.LAST_WEDNESDAY, "week"), - (DependentDate.LAST_THURSDAY, "week"), - (DependentDate.LAST_FRIDAY, "week"), - (DependentDate.LAST_SATURDAY, "week"), - (DependentDate.LAST_SUNDAY, "week"), - # month - (DependentDate.THIS_MONTH, "month"), - (DependentDate.LAST_MONTH, "month"), - (DependentDate.LAST_MONTH_BEGIN, "month"), - (DependentDate.LAST_MONTH_END, "month"), - ], -) -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": TEST_PROJECT_CODE, - "processDefinitionCode": TEST_DEFINITION_CODE, - "taskDefinitionCode": TEST_TASK_CODE, - }, -) -def test_dependent_item_get_define(mock_task_info, dep_date, dep_cycle): - """Test dependent.DependentItem get define. - - Here we have test some cases as below. - ```py - { - "projectCode": "project code", - "definitionCode": "definition code", - "depTaskCode": "dep task code", - "cycle": "day", - "dateValue": "today" - } - ``` - """ - attr = { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": dep_date, - } - expect = { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": dep_cycle, - "dateValue": dep_date, - } - task = DependentItem(**attr) - assert expect == task.get_define() - - -def test_dependent_item_date_error(): - """Test error when pass None to dependent_date.""" - with pytest.raises( - PyDSParamException, match="Parameter dependent_date must provider.*?" - ): - DependentItem( - project_name=TEST_PROJECT, - process_definition_name=TEST_PROCESS_DEFINITION, - dependent_date=None, - ) - - -@pytest.mark.parametrize( - "task_name, result", - [ - ({"dependent_task_name": TEST_TASK}, TEST_TASK), - ({}, None), - ], -) -def test_dependent_item_code_parameter(task_name: dict, result: Optional[str]): - """Test dependent item property code_parameter.""" - dependent_item = DependentItem( - project_name=TEST_PROJECT, - process_definition_name=TEST_PROCESS_DEFINITION, - **task_name, - ) - expect = (TEST_PROJECT, TEST_PROCESS_DEFINITION, result) - assert dependent_item.code_parameter == expect - - -@pytest.mark.parametrize( - "arg_list", - [ - [1, 2], - [ - DependentItem( - project_name=TEST_PROJECT, - process_definition_name=TEST_PROCESS_DEFINITION, - ), - 1, - ], - [ - And( - DependentItem( - project_name=TEST_PROJECT, - process_definition_name=TEST_PROCESS_DEFINITION, - ) - ), - 1, - ], - [ - DependentItem( - project_name=TEST_PROJECT, - process_definition_name=TEST_PROCESS_DEFINITION, - ), - And( - DependentItem( - project_name=TEST_PROJECT, - process_definition_name=TEST_PROCESS_DEFINITION, - ) - ), - ], - ], -) -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": TEST_PROJECT_CODE, - "processDefinitionCode": TEST_DEFINITION_CODE, - "taskDefinitionCode": TEST_TASK_CODE, - }, -) -def test_dependent_operator_set_define_error(mock_code, arg_list): - """Test dependent operator function :func:`set_define` with not support type.""" - dep_op = DependentOperator(*arg_list) - with pytest.raises(PyDSParamException, match="Dependent .*? operator.*?"): - dep_op.set_define_attr() - - -@pytest.mark.parametrize( - # Test dependent operator, Test dependent item parameters, expect operator define - "operators, kwargs, expect", - [ - # Test dependent operator (And | Or) with single dependent item - ( - (And, Or), - ( - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - ), - [ - { - "relation": op, - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - ], - } - for op in TEST_OPERATOR_LIST - ], - ), - # Test dependent operator (And | Or) with two dependent item - ( - (And, Or), - ( - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_WEEK, - }, - ), - [ - { - "relation": op, - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "week", - "dateValue": DependentDate.LAST_WEEK, - }, - ], - } - for op in TEST_OPERATOR_LIST - ], - ), - # Test dependent operator (And | Or) with multiply dependent item - ( - (And, Or), - ( - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_WEEK, - }, - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_ONE_DAYS, - }, - ), - [ - { - "relation": op, - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "week", - "dateValue": DependentDate.LAST_WEEK, - }, - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "day", - "dateValue": DependentDate.LAST_ONE_DAYS, - }, - ], - } - for op in TEST_OPERATOR_LIST - ], - ), - ], -) -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": TEST_PROJECT_CODE, - "processDefinitionCode": TEST_DEFINITION_CODE, - "taskDefinitionCode": TEST_TASK_CODE, - }, -) -def test_operator_dependent_item( - mock_code_info, - operators: Tuple[DependentOperator], - kwargs: Tuple[dict], - expect: List[Dict], -): - """Test DependentOperator(DependentItem) function get_define. - - Here we have test some cases as below, including single dependentItem and multiply dependentItem. - ```py - { - "relation": "AND", - "dependItemList": [ - { - "projectCode": "project code", - "definitionCode": "definition code", - "depTaskCode": "dep task code", - "cycle": "day", - "dateValue": "today" - }, - ... - ] - } - ``` - """ - for idx, operator in enumerate(operators): - # Use variable to keep one or more dependent item to test dependent operator behavior - dependent_item_list = [] - for kwarg in kwargs: - dependent_item = DependentItem(**kwarg) - dependent_item_list.append(dependent_item) - op = operator(*dependent_item_list) - assert expect[idx] == op.get_define() - - -@pytest.mark.parametrize( - # Test dependent operator, Test dependent item parameters, expect operator define - "operators, args, expect", - [ - # Test dependent operator (And | Or) with single dependent task list - ( - (And, Or), - ( - (And, Or), - ( - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - ), - ), - [ - { - "relation": par_op, - "dependTaskList": [ - { - "relation": chr_op, - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - ], - } - ], - } - for (par_op, chr_op) in itertools.product( - TEST_OPERATOR_LIST, TEST_OPERATOR_LIST - ) - ], - ), - # Test dependent operator (And | Or) with two dependent task list - ( - (And, Or), - ( - (And, Or), - ( - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_WEEK, - }, - ), - ), - [ - { - "relation": par_op, - "dependTaskList": [ - { - "relation": chr_op, - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "week", - "dateValue": DependentDate.LAST_WEEK, - }, - ], - } - ], - } - for (par_op, chr_op) in itertools.product( - TEST_OPERATOR_LIST, TEST_OPERATOR_LIST - ) - ], - ), - # Test dependent operator (And | Or) with multiply dependent task list - ( - (And, Or), - ( - (And, Or), - ( - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_WEEK, - }, - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_ONE_DAYS, - }, - ), - ), - [ - { - "relation": par_op, - "dependTaskList": [ - { - "relation": chr_op, - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "week", - "dateValue": DependentDate.LAST_WEEK, - }, - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "day", - "dateValue": DependentDate.LAST_ONE_DAYS, - }, - ], - } - ], - } - for (par_op, chr_op) in itertools.product( - TEST_OPERATOR_LIST, TEST_OPERATOR_LIST - ) - ], - ), - ], -) -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": TEST_PROJECT_CODE, - "processDefinitionCode": TEST_DEFINITION_CODE, - "taskDefinitionCode": TEST_TASK_CODE, - }, -) -def test_operator_dependent_task_list_multi_dependent_item( - mock_code_info, - operators: Tuple[DependentOperator], - args: Tuple[Union[Tuple, dict]], - expect: List[Dict], -): - """Test DependentOperator(DependentOperator(DependentItem)) single operator function get_define. - - Here we have test some cases as below. This test case only test single DependTaskList with one or - multiply dependItemList. - ```py - { - "relation": "OR", - "dependTaskList": [ - { - "relation": "AND", - "dependItemList": [ - { - "projectCode": "project code", - "definitionCode": "definition code", - "depTaskCode": "dep task code", - "cycle": "day", - "dateValue": "today" - }, - ... - ] - }, - ] - } - ``` - """ - # variable expect_idx record idx should be use to get specific expect - expect_idx = 0 - - for op_idx, operator in enumerate(operators): - dependent_operator = args[0] - dependent_item_kwargs = args[1] - - for dop_idx, dpt_op in enumerate(dependent_operator): - dependent_item_list = [] - for dpt_kwargs in dependent_item_kwargs: - dpti = DependentItem(**dpt_kwargs) - dependent_item_list.append(dpti) - child_dep_op = dpt_op(*dependent_item_list) - op = operator(child_dep_op) - assert expect[expect_idx] == op.get_define() - expect_idx += 1 - - -def get_dep_task_list(*operator): - """Return dependent task list from given operators list.""" - result = [] - for op in operator: - result.append( - { - "relation": op.operator_name(), - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "month", - "dateValue": DependentDate.LAST_MONTH_END, - }, - ], - } - ) - return result - - -@pytest.mark.parametrize( - # Test dependent operator, Test dependent item parameters, expect operator define - "operators, args, expect", - [ - # Test dependent operator (And | Or) with two dependent task list - ( - (And, Or), - ( - ((And, And), (And, Or), (Or, And), (Or, Or)), - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - ), - [ - { - "relation": parent_op.operator_name(), - "dependTaskList": get_dep_task_list(*child_ops), - } - for parent_op in (And, Or) - for child_ops in ((And, And), (And, Or), (Or, And), (Or, Or)) - ], - ), - # Test dependent operator (And | Or) with multiple dependent task list - ( - (And, Or), - ( - ((And, And, And), (And, And, And, And), (And, And, And, And, And)), - { - "project_name": TEST_PROJECT, - "process_definition_name": TEST_PROCESS_DEFINITION, - "dependent_task_name": TEST_TASK, - "dependent_date": DependentDate.LAST_MONTH_END, - }, - ), - [ - { - "relation": parent_op.operator_name(), - "dependTaskList": get_dep_task_list(*child_ops), - } - for parent_op in (And, Or) - for child_ops in ( - (And, And, And), - (And, And, And, And), - (And, And, And, And, And), - ) - ], - ), - ], -) -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": TEST_PROJECT_CODE, - "processDefinitionCode": TEST_DEFINITION_CODE, - "taskDefinitionCode": TEST_TASK_CODE, - }, -) -def test_operator_dependent_task_list_multi_dependent_list( - mock_code_info, - operators: Tuple[DependentOperator], - args: Tuple[Union[Tuple, dict]], - expect: List[Dict], -): - """Test DependentOperator(DependentOperator(DependentItem)) multiply operator function get_define. - - Here we have test some cases as below. This test case only test single DependTaskList with one or - multiply dependTaskList. - ```py - { - "relation": "OR", - "dependTaskList": [ - { - "relation": "AND", - "dependItemList": [ - { - "projectCode": "project code", - "definitionCode": "definition code", - "depTaskCode": "dep task code", - "cycle": "day", - "dateValue": "today" - } - ] - }, - ... - ] - } - ``` - """ - # variable expect_idx record idx should be use to get specific expect - expect_idx = 0 - for op_idx, operator in enumerate(operators): - dependent_operator = args[0] - dependent_item_kwargs = args[1] - - for dop_idx, dpt_ops in enumerate(dependent_operator): - dependent_task_list = [ - dpt_op(DependentItem(**dependent_item_kwargs)) for dpt_op in dpt_ops - ] - op = operator(*dependent_task_list) - assert ( - expect[expect_idx] == op.get_define() - ), f"Failed with operator syntax {operator}.{dpt_ops}" - expect_idx += 1 - - -@patch( - "pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway", - return_value={ - "projectCode": TEST_PROJECT_CODE, - "processDefinitionCode": TEST_DEFINITION_CODE, - "taskDefinitionCode": TEST_TASK_CODE, - }, -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_dependent_get_define(mock_code_version, mock_dep_code): - """Test task dependent function get_define.""" - project_name = "test-dep-project" - process_definition_name = "test-dep-definition" - dependent_task_name = "test-dep-task" - dep_operator = And( - Or( - # test dependence with add tasks - DependentItem( - project_name=project_name, - process_definition_name=process_definition_name, - ) - ), - And( - # test dependence with specific task - DependentItem( - project_name=project_name, - process_definition_name=process_definition_name, - dependent_task_name=dependent_task_name, - ) - ), - ) - - name = "test_dependent_get_define" - expect = { - "code": 123, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "DEPENDENT", - "taskParams": { - "resourceList": [], - "localParams": [], - "dependence": { - "relation": "AND", - "dependTaskList": [ - { - "relation": "OR", - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": "0", - "cycle": "day", - "dateValue": "today", - } - ], - }, - { - "relation": "AND", - "dependItemList": [ - { - "projectCode": TEST_PROJECT_CODE, - "definitionCode": TEST_DEFINITION_CODE, - "depTaskCode": TEST_TASK_CODE, - "cycle": "day", - "dateValue": "today", - } - ], - }, - ], - }, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - - task = Dependent(name, dependence=dep_operator) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dvc.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dvc.py deleted file mode 100644 index 815d896234..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dvc.py +++ /dev/null @@ -1,173 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Dvc.""" -from unittest.mock import patch - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.tasks.dvc import DVCDownload, DVCInit, DvcTaskType, DVCUpload - -repository = "git@github.com:/dvc-data-repository-example.git" - - -def test_dvc_init_get_define(): - """Test task dvc init function get_define.""" - name = "test_dvc_init" - dvc_store_url = "~/dvc_data" - - code = 123 - version = 1 - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": TaskType.DVC, - "taskParams": { - "resourceList": [], - "localParams": [], - "dvcTaskType": DvcTaskType.INIT, - "dvcRepository": repository, - "dvcStoreUrl": dvc_store_url, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - dvc_init = DVCInit(name, repository, dvc_store_url) - assert dvc_init.get_define() == expect - - -def test_dvc_upload_get_define(): - """Test task dvc upload function get_define.""" - name = "test_dvc_upload" - data_path_in_dvc_repository = "iris" - data_path_in_worker = "~/source/iris" - version = "v1" - message = "upload iris data v1" - - code = 123 - version = 1 - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": TaskType.DVC, - "taskParams": { - "resourceList": [], - "localParams": [], - "dvcTaskType": DvcTaskType.UPLOAD, - "dvcRepository": repository, - "dvcDataLocation": data_path_in_dvc_repository, - "dvcLoadSaveDataPath": data_path_in_worker, - "dvcVersion": version, - "dvcMessage": message, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - dvc_upload = DVCUpload( - name, - repository=repository, - data_path_in_dvc_repository=data_path_in_dvc_repository, - data_path_in_worker=data_path_in_worker, - version=version, - message=message, - ) - assert dvc_upload.get_define() == expect - - -def test_dvc_download_get_define(): - """Test task dvc download function get_define.""" - name = "test_dvc_upload" - data_path_in_dvc_repository = "iris" - data_path_in_worker = "~/target/iris" - version = "v1" - - code = 123 - version = 1 - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": TaskType.DVC, - "taskParams": { - "resourceList": [], - "localParams": [], - "dvcTaskType": DvcTaskType.DOWNLOAD, - "dvcRepository": repository, - "dvcDataLocation": data_path_in_dvc_repository, - "dvcLoadSaveDataPath": data_path_in_worker, - "dvcVersion": version, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - dvc_download = DVCDownload( - name, - repository=repository, - data_path_in_dvc_repository=data_path_in_dvc_repository, - data_path_in_worker=data_path_in_worker, - version=version, - ) - assert dvc_download.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_flink.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_flink.py deleted file mode 100644 index 2f30a494b9..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_flink.py +++ /dev/null @@ -1,83 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Flink.""" - -from unittest.mock import patch - -from pydolphinscheduler.tasks.flink import DeployMode, Flink, FlinkVersion, ProgramType - - -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "test"}), -) -def test_flink_get_define(mock_resource): - """Test task flink function get_define.""" - code = 123 - version = 1 - name = "test_flink_get_define" - main_class = "org.apache.flink.test_main_class" - main_package = "test_main_package" - program_type = ProgramType.JAVA - deploy_mode = DeployMode.LOCAL - - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "FLINK", - "taskParams": { - "mainClass": main_class, - "mainJar": { - "id": 1, - }, - "programType": program_type, - "deployMode": deploy_mode, - "flinkVersion": FlinkVersion.LOW_VERSION, - "slot": 1, - "parallelism": 1, - "taskManager": 2, - "jobManagerMemory": "1G", - "taskManagerMemory": "2G", - "appName": None, - "mainArgs": None, - "others": None, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = Flink(name, main_class, main_package, program_type, deploy_mode) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_func_wrap.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_func_wrap.py deleted file mode 100644 index 628b6e7f86..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_func_wrap.py +++ /dev/null @@ -1,169 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test module about function wrap task decorator.""" - -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.tasks.func_wrap import task -from tests.testing.decorator import foo as foo_decorator -from tests.testing.task import Task - -PD_NAME = "test_process_definition" -TASK_NAME = "test_task" - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", return_value=(12345, 1) -) -def test_single_task_outside(mock_code): - """Test single decorator task which outside process definition.""" - - @task - def foo(): - print(TASK_NAME) - - with ProcessDefinition(PD_NAME) as pd: - foo() - - assert pd is not None and pd.name == PD_NAME - assert len(pd.tasks) == 1 - - pd_task = pd.tasks[12345] - assert pd_task.name == "foo" - assert pd_task.raw_script == "def foo():\n print(TASK_NAME)\nfoo()" - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", return_value=(12345, 1) -) -def test_single_task_inside(mock_code): - """Test single decorator task which inside process definition.""" - with ProcessDefinition(PD_NAME) as pd: - - @task - def foo(): - print(TASK_NAME) - - foo() - - assert pd is not None and pd.name == PD_NAME - assert len(pd.tasks) == 1 - - pd_task = pd.tasks[12345] - assert pd_task.name == "foo" - assert pd_task.raw_script == "def foo():\n print(TASK_NAME)\nfoo()" - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", return_value=(12345, 1) -) -def test_addition_decorator_error(mock_code): - """Test error when using task decorator to a function already have decorator.""" - - @task - @foo_decorator - def foo(): - print(TASK_NAME) - - with ProcessDefinition(PD_NAME) as pd: # noqa: F841 - with pytest.raises( - PyDSParamException, match="Do no support other decorators for.*" - ): - foo() - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - side_effect=Task("test_func_wrap", "func_wrap").gen_code_and_version, -) -def test_multiple_tasks_outside(mock_code): - """Test multiple decorator tasks which outside process definition.""" - - @task - def foo(): - print(TASK_NAME) - - @task - def bar(): - print(TASK_NAME) - - with ProcessDefinition(PD_NAME) as pd: - foo = foo() - bar = bar() - - foo >> bar - - assert pd is not None and pd.name == PD_NAME - assert len(pd.tasks) == 2 - - task_foo = pd.get_one_task_by_name("foo") - task_bar = pd.get_one_task_by_name("bar") - assert set(pd.task_list) == {task_foo, task_bar} - assert ( - task_foo is not None - and task_foo._upstream_task_codes == set() - and task_foo._downstream_task_codes.pop() == task_bar.code - ) - assert ( - task_bar is not None - and task_bar._upstream_task_codes.pop() == task_foo.code - and task_bar._downstream_task_codes == set() - ) - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - side_effect=Task("test_func_wrap", "func_wrap").gen_code_and_version, -) -def test_multiple_tasks_inside(mock_code): - """Test multiple decorator tasks which inside process definition.""" - with ProcessDefinition(PD_NAME) as pd: - - @task - def foo(): - print(TASK_NAME) - - @task - def bar(): - print(TASK_NAME) - - foo = foo() - bar = bar() - - foo >> bar - - assert pd is not None and pd.name == PD_NAME - assert len(pd.tasks) == 2 - - task_foo = pd.get_one_task_by_name("foo") - task_bar = pd.get_one_task_by_name("bar") - assert set(pd.task_list) == {task_foo, task_bar} - assert ( - task_foo is not None - and task_foo._upstream_task_codes == set() - and task_foo._downstream_task_codes.pop() == task_bar.code - ) - assert ( - task_bar is not None - and task_bar._upstream_task_codes.pop() == task_foo.code - and task_bar._downstream_task_codes == set() - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_http.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_http.py deleted file mode 100644 index 399829b68c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_http.py +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task HTTP.""" - -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.tasks.http import Http, HttpCheckCondition, HttpMethod - - -@pytest.mark.parametrize( - "class_name, attrs", - [ - (HttpMethod, ("GET", "POST", "HEAD", "PUT", "DELETE")), - ( - HttpCheckCondition, - ( - "STATUS_CODE_DEFAULT", - "STATUS_CODE_CUSTOM", - "BODY_CONTAINS", - "BODY_NOT_CONTAINS", - ), - ), - ], -) -def test_attr_exists(class_name, attrs): - """Test weather class HttpMethod and HttpCheckCondition contain specific attribute.""" - assert all(hasattr(class_name, attr) for attr in attrs) - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - {"url": "https://www.apache.org"}, - { - "url": "https://www.apache.org", - "httpMethod": "GET", - "httpParams": [], - "httpCheckCondition": "STATUS_CODE_DEFAULT", - "condition": None, - "connectTimeout": 60000, - "socketTimeout": 60000, - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_property_task_params(mock_code_version, attr, expect): - """Test task http property.""" - task = Http("test-http-task-params", **attr) - assert expect == task.task_params - - -@pytest.mark.parametrize( - "param", - [ - {"http_method": "http_method"}, - {"http_check_condition": "http_check_condition"}, - {"http_check_condition": HttpCheckCondition.STATUS_CODE_CUSTOM}, - { - "http_check_condition": HttpCheckCondition.STATUS_CODE_CUSTOM, - "condition": None, - }, - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_http_task_param_not_support_param(mock_code, param): - """Test HttpTaskParams not support parameter.""" - url = "https://www.apache.org" - with pytest.raises(PyDSParamException, match="Parameter .*?"): - Http("test-no-supprot-param", url, **param) - - -def test_http_get_define(): - """Test task HTTP function get_define.""" - code = 123 - version = 1 - name = "test_http_get_define" - url = "https://www.apache.org" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "HTTP", - "taskParams": { - "localParams": [], - "httpParams": [], - "url": url, - "httpMethod": "GET", - "httpCheckCondition": "STATUS_CODE_DEFAULT", - "condition": None, - "connectTimeout": 60000, - "socketTimeout": 60000, - "dependence": {}, - "resourceList": [], - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - http = Http(name, url) - assert http.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_map_reduce.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_map_reduce.py deleted file mode 100644 index 5d38e93aa4..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_map_reduce.py +++ /dev/null @@ -1,76 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task MR.""" - -from unittest.mock import patch - -from pydolphinscheduler.tasks.map_reduce import MR, ProgramType - - -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "test"}), -) -def test_mr_get_define(mock_resource): - """Test task mr function get_define.""" - code = 123 - version = 1 - name = "test_mr_get_define" - main_class = "org.apache.mr.test_main_class" - main_package = "test_main_package" - program_type = ProgramType.JAVA - main_args = "/dolphinscheduler/resources/file.txt /output/ds" - - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "MR", - "taskParams": { - "mainClass": main_class, - "mainJar": { - "id": 1, - }, - "programType": program_type, - "appName": None, - "mainArgs": main_args, - "others": None, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = MR(name, main_class, main_package, program_type, main_args=main_args) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py deleted file mode 100644 index af0a324b53..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_mlflow.py +++ /dev/null @@ -1,205 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task MLflow.""" -from copy import deepcopy -from unittest.mock import patch - -from pydolphinscheduler.tasks.mlflow import ( - MLflowDeployType, - MLflowJobType, - MLflowModels, - MLFlowProjectsAutoML, - MLFlowProjectsBasicAlgorithm, - MLFlowProjectsCustom, - MLflowTaskType, -) - -CODE = 123 -VERSION = 1 -MLFLOW_TRACKING_URI = "http://127.0.0.1:5000" - -EXPECT = { - "code": CODE, - "version": VERSION, - "description": None, - "delayTime": 0, - "taskType": "MLFLOW", - "taskParams": { - "resourceList": [], - "localParams": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, -} - - -def test_mlflow_models_get_define(): - """Test task mlflow models function get_define.""" - name = "mlflow_models" - model_uri = "models:/xgboost_native/Production" - port = 7001 - - expect = deepcopy(EXPECT) - expect["name"] = name - task_params = expect["taskParams"] - task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI - task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_MODELS - task_params["deployType"] = MLflowDeployType.DOCKER - task_params["deployModelKey"] = model_uri - task_params["deployPort"] = port - - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(CODE, VERSION), - ): - task = MLflowModels( - name=name, - model_uri=model_uri, - mlflow_tracking_uri=MLFLOW_TRACKING_URI, - deploy_mode=MLflowDeployType.DOCKER, - port=port, - ) - assert task.get_define() == expect - - -def test_mlflow_project_custom_get_define(): - """Test task mlflow project custom function get_define.""" - name = ("train_xgboost_native",) - repository = "https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native" - mlflow_tracking_uri = MLFLOW_TRACKING_URI - parameters = "-P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9" - experiment_name = "xgboost" - - expect = deepcopy(EXPECT) - expect["name"] = name - task_params = expect["taskParams"] - - task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI - task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_PROJECTS - task_params["mlflowJobType"] = MLflowJobType.CUSTOM_PROJECT - task_params["experimentName"] = experiment_name - task_params["params"] = parameters - task_params["mlflowProjectRepository"] = repository - task_params["mlflowProjectVersion"] = "dev" - - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(CODE, VERSION), - ): - task = MLFlowProjectsCustom( - name=name, - repository=repository, - mlflow_tracking_uri=mlflow_tracking_uri, - parameters=parameters, - experiment_name=experiment_name, - version="dev", - ) - assert task.get_define() == expect - - -def test_mlflow_project_automl_get_define(): - """Test task mlflow project automl function get_define.""" - name = ("train_automl",) - mlflow_tracking_uri = MLFLOW_TRACKING_URI - parameters = "time_budget=30;estimator_list=['lgbm']" - experiment_name = "automl_iris" - model_name = "iris_A" - automl_tool = "flaml" - data_path = "/data/examples/iris" - - expect = deepcopy(EXPECT) - expect["name"] = name - task_params = expect["taskParams"] - - task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI - task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_PROJECTS - task_params["mlflowJobType"] = MLflowJobType.AUTOML - task_params["experimentName"] = experiment_name - task_params["modelName"] = model_name - task_params["registerModel"] = bool(model_name) - task_params["dataPath"] = data_path - task_params["params"] = parameters - task_params["automlTool"] = automl_tool - - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(CODE, VERSION), - ): - task = MLFlowProjectsAutoML( - name=name, - mlflow_tracking_uri=mlflow_tracking_uri, - parameters=parameters, - experiment_name=experiment_name, - model_name=model_name, - automl_tool=automl_tool, - data_path=data_path, - ) - assert task.get_define() == expect - - -def test_mlflow_project_basic_algorithm_get_define(): - """Test task mlflow project BasicAlgorithm function get_define.""" - name = "train_basic_algorithm" - mlflow_tracking_uri = MLFLOW_TRACKING_URI - parameters = "n_estimators=200;learning_rate=0.2" - experiment_name = "basic_algorithm_iris" - model_name = "iris_B" - algorithm = "lightgbm" - data_path = "/data/examples/iris" - search_params = "max_depth=[5, 10];n_estimators=[100, 200]" - - expect = deepcopy(EXPECT) - expect["name"] = name - task_params = expect["taskParams"] - - task_params["mlflowTrackingUri"] = MLFLOW_TRACKING_URI - task_params["mlflowTaskType"] = MLflowTaskType.MLFLOW_PROJECTS - task_params["mlflowJobType"] = MLflowJobType.BASIC_ALGORITHM - task_params["experimentName"] = experiment_name - task_params["modelName"] = model_name - task_params["registerModel"] = bool(model_name) - task_params["dataPath"] = data_path - task_params["params"] = parameters - task_params["algorithm"] = algorithm - task_params["searchParams"] = search_params - - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(CODE, VERSION), - ): - task = MLFlowProjectsBasicAlgorithm( - name=name, - mlflow_tracking_uri=mlflow_tracking_uri, - parameters=parameters, - experiment_name=experiment_name, - model_name=model_name, - algorithm=algorithm, - data_path=data_path, - search_params=search_params, - ) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_openmldb.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_openmldb.py deleted file mode 100644 index f580ab06b2..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_openmldb.py +++ /dev/null @@ -1,73 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task OpenMLDB.""" -from unittest.mock import patch - -from pydolphinscheduler.constants import TaskType -from pydolphinscheduler.tasks.openmldb import OpenMLDB - - -def test_openmldb_get_define(): - """Test task openmldb function get_define.""" - zookeeper = "127.0.0.1:2181" - zookeeper_path = "/openmldb" - execute_mode = "offline" - - sql = """USE demo_db; - set @@job_timeout=200000; - LOAD DATA INFILE 'file:///tmp/train_sample.csv' - INTO TABLE talkingdata OPTIONS(mode='overwrite'); - """ - - code = 123 - version = 1 - name = "test_openmldb_get_define" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": TaskType.OPENMLDB, - "taskParams": { - "resourceList": [], - "localParams": [], - "zk": zookeeper, - "zkPath": zookeeper_path, - "executeMode": execute_mode, - "sql": sql, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - openmldb = OpenMLDB(name, zookeeper, zookeeper_path, execute_mode, sql) - assert openmldb.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_procedure.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_procedure.py deleted file mode 100644 index 80afe7b879..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_procedure.py +++ /dev/null @@ -1,107 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Procedure.""" - -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.tasks.procedure import Procedure - -TEST_PROCEDURE_SQL = ( - 'create procedure HelloWorld() selece "hello world"; call HelloWorld();' -) -TEST_PROCEDURE_DATASOURCE_NAME = "test_datasource" - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "test-procedure-task-params", - "datasource_name": TEST_PROCEDURE_DATASOURCE_NAME, - "method": TEST_PROCEDURE_SQL, - }, - { - "method": TEST_PROCEDURE_SQL, - "type": "MYSQL", - "datasource": 1, - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "MYSQL"}), -) -def test_property_task_params(mock_datasource, mock_code_version, attr, expect): - """Test task sql task property.""" - task = Procedure(**attr) - assert expect == task.task_params - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "MYSQL"}), -) -def test_sql_get_define(mock_datasource, mock_code_version): - """Test task procedure function get_define.""" - name = "test_procedure_get_define" - expect = { - "code": 123, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "PROCEDURE", - "taskParams": { - "type": "MYSQL", - "datasource": 1, - "method": TEST_PROCEDURE_SQL, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - task = Procedure(name, TEST_PROCEDURE_DATASOURCE_NAME, TEST_PROCEDURE_SQL) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_python.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_python.py deleted file mode 100644 index 77aa10625b..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_python.py +++ /dev/null @@ -1,201 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task python.""" -from pathlib import Path -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.resources_plugin import Local -from pydolphinscheduler.tasks.python import Python -from pydolphinscheduler.utils import file -from tests.testing.file import delete_file - - -def foo(): # noqa: D103 - print("hello world.") - - -@pytest.fixture() -def setup_crt_first(request): - """Set up and teardown about create file first and then delete it.""" - file_content = request.param.get("file_content") - file_path = request.param.get("file_path") - file.write( - content=file_content, - to_path=file_path, - ) - yield - delete_file(file_path) - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - {"definition": "print(1)"}, - { - "definition": "print(1)", - "rawScript": "print(1)", - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ), - ( - {"definition": "def foo():\n print('I am foo')"}, - { - "definition": "def foo():\n print('I am foo')", - "rawScript": "def foo():\n print('I am foo')\nfoo()", - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ), - ( - {"definition": foo}, - { - "definition": foo, - "rawScript": 'def foo(): # noqa: D103\n print("hello world.")\nfoo()', - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_property_task_params(mock_code_version, attr, expect): - """Test task python property.""" - task = Python("test-python-task-params", **attr) - assert expect == task.task_params - - -@pytest.mark.parametrize( - "script_code", - [ - 123, - ("print", "hello world"), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_python_task_not_support_code(mock_code, script_code): - """Test python task parameters.""" - name = "not_support_code_type" - with pytest.raises( - PyDSParamException, match="Parameter definition do not support .*?" - ): - task = Python(name, script_code) - task.raw_script - - -@pytest.mark.parametrize( - "name, script_code, raw", - [ - ("string_define", 'print("hello world.")', 'print("hello world.")'), - ( - "function_define", - foo, - 'def foo(): # noqa: D103\n print("hello world.")\nfoo()', - ), - ], -) -def test_python_get_define(name, script_code, raw): - """Test task python function get_define.""" - code = 123 - version = 1 - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "PYTHON", - "taskParams": { - "definition": script_code, - "resourceList": [], - "localParams": [], - "rawScript": raw, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - shell = Python(name, script_code) - assert shell.get_define() == expect - - -@pytest.mark.parametrize( - "setup_crt_first", - [ - { - "file_path": Path(__file__).parent.joinpath("local_res.py"), - "file_content": "test local resource", - } - ], - indirect=True, -) -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "task_python", - "definition": "local_res.py", - "resource_plugin": Local(str(Path(__file__).parent)), - }, - "test local resource", - ), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_resources_local_python_command_content( - mock_code_version, attr, expect, setup_crt_first -): - """Test task Python definition content through the local resource plug-in.""" - python = Python(**attr) - assert expect == getattr(python, "definition") diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_pytorch.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_pytorch.py deleted file mode 100644 index eccb51ca31..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_pytorch.py +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Pytorch.""" -from copy import deepcopy -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.tasks.pytorch import DEFAULT, Pytorch -from tests.testing.task import Task - -CODE = 123 -VERSION = 1 - -EXPECT = { - "code": CODE, - "version": VERSION, - "description": None, - "delayTime": 0, - "taskType": "PYTORCH", - "taskParams": { - "resourceList": [], - "localParams": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, -} - - -def test_pytorch_get_define(): - """Test task pytorch function get_define.""" - name = "task_conda_env" - script = "main.py" - script_params = "--dry-run --no-cuda" - project_path = "https://github.com/pytorch/examples#mnist" - is_create_environment = True - python_env_tool = "conda" - requirements = "requirements.txt" - conda_python_version = "3.7" - - expect = deepcopy(EXPECT) - expect["name"] = name - task_params = expect["taskParams"] - - task_params["script"] = script - task_params["scriptParams"] = script_params - task_params["pythonPath"] = project_path - task_params["otherParams"] = True - task_params["isCreateEnvironment"] = is_create_environment - task_params["pythonCommand"] = "${PYTHON_HOME}" - task_params["pythonEnvTool"] = python_env_tool - task_params["requirements"] = requirements - task_params["condaPythonVersion"] = conda_python_version - - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(CODE, VERSION), - ): - task = Pytorch( - name=name, - script=script, - script_params=script_params, - project_path=project_path, - is_create_environment=is_create_environment, - python_env_tool=python_env_tool, - requirements=requirements, - ) - assert task.get_define() == expect - - -@pytest.mark.parametrize( - "is_create_environment, project_path, python_command, expect", - [ - ( - DEFAULT.is_create_environment, - DEFAULT.project_path, - DEFAULT.python_command, - False, - ), - (True, DEFAULT.project_path, DEFAULT.python_command, True), - (DEFAULT.is_create_environment, "/home", DEFAULT.python_command, True), - (DEFAULT.is_create_environment, DEFAULT.project_path, "/usr/bin/python", True), - ], -) -def test_other_params(is_create_environment, project_path, python_command, expect): - """Test task pytorch function other_params.""" - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - side_effect=Task("test_func_wrap", "func_wrap").gen_code_and_version, - ): - task = Pytorch( - name="test", - script="", - script_params="", - project_path=project_path, - is_create_environment=is_create_environment, - python_command=python_command, - ) - assert task.other_params == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sagemaker.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sagemaker.py deleted file mode 100644 index 20edc22805..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sagemaker.py +++ /dev/null @@ -1,102 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task SageMaker.""" -import json -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.tasks.sagemaker import SageMaker - -sagemaker_request_json = json.dumps( - { - "ParallelismConfiguration": {"MaxParallelExecutionSteps": 1}, - "PipelineExecutionDescription": "test Pipeline", - "PipelineExecutionDisplayName": "AbalonePipeline", - "PipelineName": "AbalonePipeline", - "PipelineParameters": [ - {"Name": "ProcessingInstanceType", "Value": "ml.m4.xlarge"}, - {"Name": "ProcessingInstanceCount", "Value": "2"}, - ], - }, - indent=2, -) - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - {"sagemaker_request_json": sagemaker_request_json}, - { - "sagemakerRequestJson": sagemaker_request_json, - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_property_task_params(mock_code_version, attr, expect): - """Test task sagemaker task property.""" - task = SageMaker("test-sagemaker-task-params", **attr) - assert expect == task.task_params - - -def test_sagemaker_get_define(): - """Test task sagemaker function get_define.""" - code = 123 - version = 1 - name = "test_sagemaker_get_define" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "SAGEMAKER", - "taskParams": { - "resourceList": [], - "localParams": [], - "sagemakerRequestJson": sagemaker_request_json, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - sagemaker = SageMaker(name, sagemaker_request_json) - assert sagemaker.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_shell.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_shell.py deleted file mode 100644 index 9344ac2bb0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_shell.py +++ /dev/null @@ -1,133 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task shell.""" - -from pathlib import Path -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.resources_plugin import Local -from pydolphinscheduler.tasks.shell import Shell -from pydolphinscheduler.utils import file -from tests.testing.file import delete_file - -file_name = "local_res.sh" -file_content = 'echo "test res_local"' -res_plugin_prefix = Path(__file__).parent -file_path = res_plugin_prefix.joinpath(file_name) - - -@pytest.fixture -def setup_crt_first(): - """Set up and teardown about create file first and then delete it.""" - file.write(content=file_content, to_path=file_path) - yield - delete_file(file_path) - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - {"command": "test script"}, - { - "rawScript": "test script", - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_property_task_params(mock_code_version, attr, expect): - """Test task shell task property.""" - task = Shell("test-shell-task-params", **attr) - assert expect == task.task_params - - -def test_shell_get_define(): - """Test task shell function get_define.""" - code = 123 - version = 1 - name = "test_shell_get_define" - command = "echo test shell" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "environmentCode": None, - "delayTime": 0, - "taskType": "SHELL", - "taskParams": { - "resourceList": [], - "localParams": [], - "rawScript": command, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - shell = Shell(name, command) - print(shell.get_define()) - assert shell.get_define() == expect - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "test-local-res-command-content", - "command": file_name, - "resource_plugin": Local(str(res_plugin_prefix)), - }, - file_content, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_resources_local_shell_command_content( - mock_code_version, attr, expect, setup_crt_first -): - """Test task shell task command content through the local resource plug-in.""" - task = Shell(**attr) - assert expect == getattr(task, "raw_script") diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_spark.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_spark.py deleted file mode 100644 index 1fdb1fa400..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_spark.py +++ /dev/null @@ -1,82 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Spark.""" - -from unittest.mock import patch - -from pydolphinscheduler.tasks.spark import DeployMode, ProgramType, Spark - - -@patch( - "pydolphinscheduler.core.engine.Engine.get_resource_info", - return_value=({"id": 1, "name": "test"}), -) -def test_spark_get_define(mock_resource): - """Test task spark function get_define.""" - code = 123 - version = 1 - name = "test_spark_get_define" - main_class = "org.apache.spark.test_main_class" - main_package = "test_main_package" - program_type = ProgramType.JAVA - deploy_mode = DeployMode.LOCAL - - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "SPARK", - "taskParams": { - "mainClass": main_class, - "mainJar": { - "id": 1, - }, - "programType": program_type, - "deployMode": deploy_mode, - "driverCores": 1, - "driverMemory": "512M", - "numExecutors": 2, - "executorMemory": "2G", - "executorCores": 2, - "appName": None, - "mainArgs": None, - "others": None, - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = Spark(name, main_class, main_package, program_type, deploy_mode) - assert task.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py deleted file mode 100644 index a22d9206d0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py +++ /dev/null @@ -1,208 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task Sql.""" -from pathlib import Path -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.resources_plugin import Local -from pydolphinscheduler.tasks.sql import Sql, SqlType -from pydolphinscheduler.utils import file -from tests.testing.file import delete_file - -file_name = "local_res.sql" -file_content = "select 1" -res_plugin_prefix = Path(__file__).parent -file_path = res_plugin_prefix.joinpath(file_name) - - -@pytest.fixture -def setup_crt_first(): - """Set up and teardown about create file first and then delete it.""" - file.write(content=file_content, to_path=file_path) - yield - delete_file(file_path) - - -@pytest.mark.parametrize( - "sql, param_sql_type, sql_type", - [ - ("select 1", None, SqlType.SELECT), - (" select 1", None, SqlType.SELECT), - (" select 1 ", None, SqlType.SELECT), - (" select 'insert' ", None, SqlType.SELECT), - (" select 'insert ' ", None, SqlType.SELECT), - ("with tmp as (select 1) select * from tmp ", None, SqlType.SELECT), - ( - "insert into table_name(col1, col2) value (val1, val2)", - None, - SqlType.NOT_SELECT, - ), - ( - "insert into table_name(select, col2) value ('select', val2)", - None, - SqlType.NOT_SELECT, - ), - ("update table_name SET col1=val1 where col1=val2", None, SqlType.NOT_SELECT), - ( - "update table_name SET col1='select' where col1=val2", - None, - SqlType.NOT_SELECT, - ), - ("delete from table_name where id < 10", None, SqlType.NOT_SELECT), - ("delete from table_name where id < 10", None, SqlType.NOT_SELECT), - ("alter table table_name add column col1 int", None, SqlType.NOT_SELECT), - ("create table table_name2 (col1 int)", None, SqlType.NOT_SELECT), - ("truncate table table_name", None, SqlType.NOT_SELECT), - ("create table table_name2 (col1 int)", SqlType.SELECT, SqlType.SELECT), - ("select 1", SqlType.NOT_SELECT, SqlType.NOT_SELECT), - ("create table table_name2 (col1 int)", SqlType.NOT_SELECT, SqlType.NOT_SELECT), - ("select 1", SqlType.SELECT, SqlType.SELECT), - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "mock_type"}), -) -def test_get_sql_type( - mock_datasource, mock_code_version, sql, param_sql_type, sql_type -): - """Test property sql_type could return correct type.""" - name = "test_get_sql_type" - datasource_name = "test_datasource" - task = Sql(name, datasource_name, sql, sql_type=param_sql_type) - assert ( - sql_type == task.sql_type - ), f"Sql {sql} expect sql type is {sql_type} but got {task.sql_type}" - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - {"datasource_name": "datasource_name", "sql": "select 1"}, - { - "sql": "select 1", - "type": "MYSQL", - "datasource": 1, - "sqlType": "0", - "preStatements": [], - "postStatements": [], - "displayRows": 10, - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "MYSQL"}), -) -def test_property_task_params(mock_datasource, mock_code_version, attr, expect): - """Test task sql task property.""" - task = Sql("test-sql-task-params", **attr) - assert expect == task.task_params - - -@patch( - "pydolphinscheduler.core.database.Database.get_database_info", - return_value=({"id": 1, "type": "MYSQL"}), -) -def test_sql_get_define(mock_datasource): - """Test task sql function get_define.""" - code = 123 - version = 1 - name = "test_sql_get_define" - command = "select 1" - datasource_name = "test_datasource" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "SQL", - "taskParams": { - "type": "MYSQL", - "datasource": 1, - "sql": command, - "sqlType": "0", - "displayRows": 10, - "preStatements": [], - "postStatements": [], - "localParams": [], - "resourceList": [], - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - task = Sql(name, datasource_name, command) - assert task.get_define() == expect - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - { - "name": "test-sql-local-res", - "sql": file_name, - "datasource_name": "test_datasource", - "resource_plugin": Local(str(res_plugin_prefix)), - }, - file_content, - ) - ], -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_resources_local_sql_command_content( - mock_code_version, attr, expect, setup_crt_first -): - """Test sql content through the local resource plug-in.""" - sql = Sql(**attr) - assert expect == getattr(sql, "sql") diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sub_process.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sub_process.py deleted file mode 100644 index 126ab1015e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sub_process.py +++ /dev/null @@ -1,115 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task sub_process.""" - - -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.tasks.sub_process import SubProcess - -TEST_SUB_PROCESS_DEFINITION_NAME = "sub-test-process-definition" -TEST_SUB_PROCESS_DEFINITION_CODE = "3643589832320" -TEST_PROCESS_DEFINITION_NAME = "simple-test-process-definition" - - -@pytest.mark.parametrize( - "attr, expect", - [ - ( - {"process_definition_name": TEST_SUB_PROCESS_DEFINITION_NAME}, - { - "processDefinitionCode": TEST_SUB_PROCESS_DEFINITION_CODE, - "localParams": [], - "resourceList": [], - "dependence": {}, - "waitStartTimeout": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - }, - ) - ], -) -@patch( - "pydolphinscheduler.tasks.sub_process.SubProcess.get_process_definition_info", - return_value=( - { - "id": 1, - "name": TEST_SUB_PROCESS_DEFINITION_NAME, - "code": TEST_SUB_PROCESS_DEFINITION_CODE, - } - ), -) -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_property_task_params(mock_code_version, mock_pd_info, attr, expect): - """Test task sub process property.""" - task = SubProcess("test-sub-process-task-params", **attr) - assert expect == task.task_params - - -@patch( - "pydolphinscheduler.tasks.sub_process.SubProcess.get_process_definition_info", - return_value=( - { - "id": 1, - "name": TEST_SUB_PROCESS_DEFINITION_NAME, - "code": TEST_SUB_PROCESS_DEFINITION_CODE, - } - ), -) -def test_sub_process_get_define(mock_process_definition): - """Test task sub_process function get_define.""" - code = 123 - version = 1 - name = "test_sub_process_get_define" - expect = { - "code": code, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "SUB_PROCESS", - "taskParams": { - "resourceList": [], - "localParams": [], - "processDefinitionCode": TEST_SUB_PROCESS_DEFINITION_CODE, - "dependence": {}, - "conditionResult": {"successNode": [""], "failedNode": [""]}, - "waitStartTimeout": {}, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - with patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(code, version), - ): - with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME): - sub_process = SubProcess(name, TEST_SUB_PROCESS_DEFINITION_NAME) - assert sub_process.get_define() == expect diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_switch.py b/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_switch.py deleted file mode 100644 index 6f9222cec0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_switch.py +++ /dev/null @@ -1,299 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test Task switch.""" - -from typing import Optional, Tuple -from unittest.mock import patch - -import pytest - -from pydolphinscheduler.core.process_definition import ProcessDefinition -from pydolphinscheduler.exceptions import PyDSParamException -from pydolphinscheduler.tasks.switch import ( - Branch, - Default, - Switch, - SwitchBranch, - SwitchCondition, -) -from tests.testing.task import Task - -TEST_NAME = "test-task" -TEST_TYPE = "test-type" - - -def task_switch_arg_wrapper(obj, task: Task, exp: Optional[str] = None) -> SwitchBranch: - """Wrap task switch and its subclass.""" - if obj is Default: - return obj(task) - elif obj is Branch: - return obj(exp, task) - else: - return obj(task, exp) - - -@pytest.mark.parametrize( - "obj", - [ - SwitchBranch, - Branch, - Default, - ], -) -def test_switch_branch_attr_next_node(obj: SwitchBranch): - """Test get attribute from class switch branch.""" - task = Task(name=TEST_NAME, task_type=TEST_TYPE) - switch_branch = task_switch_arg_wrapper(obj, task=task, exp="unittest") - assert switch_branch.next_node == task.code - - -@pytest.mark.parametrize( - "obj", - [ - SwitchBranch, - Default, - ], -) -def test_switch_branch_get_define_without_condition(obj: SwitchBranch): - """Test function :func:`get_define` with None value of attribute condition from class switch branch.""" - task = Task(name=TEST_NAME, task_type=TEST_TYPE) - expect = {"nextNode": task.code} - switch_branch = task_switch_arg_wrapper(obj, task=task) - assert switch_branch.get_define() == expect - - -@pytest.mark.parametrize( - "obj", - [ - SwitchBranch, - Branch, - ], -) -def test_switch_branch_get_define_condition(obj: SwitchBranch): - """Test function :func:`get_define` with specific attribute condition from class switch branch.""" - task = Task(name=TEST_NAME, task_type=TEST_TYPE) - exp = "${var} == 1" - expect = { - "nextNode": task.code, - "condition": exp, - } - switch_branch = task_switch_arg_wrapper(obj, task=task, exp=exp) - assert switch_branch.get_define() == expect - - -@pytest.mark.parametrize( - "args, msg", - [ - ( - (1,), - ".*?parameter only support SwitchBranch but got.*?", - ), - ( - (Default(Task(TEST_NAME, TEST_TYPE)), 2), - ".*?parameter only support SwitchBranch but got.*?", - ), - ( - (Default(Task(TEST_NAME, TEST_TYPE)), Default(Task(TEST_NAME, TEST_TYPE))), - ".*?parameter only support exactly one default branch", - ), - ( - ( - Branch(condition="unittest", task=Task(TEST_NAME, TEST_TYPE)), - Default(Task(TEST_NAME, TEST_TYPE)), - Default(Task(TEST_NAME, TEST_TYPE)), - ), - ".*?parameter only support exactly one default branch", - ), - ], -) -def test_switch_condition_set_define_attr_error(args: Tuple, msg: str): - """Test error case on :class:`SwitchCondition`.""" - switch_condition = SwitchCondition(*args) - with pytest.raises(PyDSParamException, match=msg): - switch_condition.set_define_attr() - - -def test_switch_condition_set_define_attr_default(): - """Test set :class:`Default` to attribute on :class:`SwitchCondition`.""" - task = Task(TEST_NAME, TEST_TYPE) - switch_condition = SwitchCondition(Default(task)) - switch_condition.set_define_attr() - assert getattr(switch_condition, "next_node") == task.code - assert getattr(switch_condition, "depend_task_list") == [] - - -def test_switch_condition_set_define_attr_branch(): - """Test set :class:`Branch` to attribute on :class:`SwitchCondition`.""" - task = Task(TEST_NAME, TEST_TYPE) - switch_condition = SwitchCondition( - Branch("unittest1", task), Branch("unittest2", task) - ) - expect = [ - {"condition": "unittest1", "nextNode": task.code}, - {"condition": "unittest2", "nextNode": task.code}, - ] - - switch_condition.set_define_attr() - assert getattr(switch_condition, "next_node") == "" - assert getattr(switch_condition, "depend_task_list") == expect - - -def test_switch_condition_set_define_attr_mix_branch_and_default(): - """Test set bot :class:`Branch` and :class:`Default` to attribute on :class:`SwitchCondition`.""" - task = Task(TEST_NAME, TEST_TYPE) - switch_condition = SwitchCondition( - Branch("unittest1", task), Branch("unittest2", task), Default(task) - ) - expect = [ - {"condition": "unittest1", "nextNode": task.code}, - {"condition": "unittest2", "nextNode": task.code}, - ] - - switch_condition.set_define_attr() - assert getattr(switch_condition, "next_node") == task.code - assert getattr(switch_condition, "depend_task_list") == expect - - -def test_switch_condition_get_define_default(): - """Test function :func:`get_define` with :class:`Default` in :class:`SwitchCondition`.""" - task = Task(TEST_NAME, TEST_TYPE) - switch_condition = SwitchCondition(Default(task)) - expect = { - "dependTaskList": [], - "nextNode": task.code, - } - assert switch_condition.get_define() == expect - - -def test_switch_condition_get_define_branch(): - """Test function :func:`get_define` with :class:`Branch` in :class:`SwitchCondition`.""" - task = Task(TEST_NAME, TEST_TYPE) - switch_condition = SwitchCondition( - Branch("unittest1", task), Branch("unittest2", task) - ) - expect = { - "dependTaskList": [ - {"condition": "unittest1", "nextNode": task.code}, - {"condition": "unittest2", "nextNode": task.code}, - ], - "nextNode": "", - } - assert switch_condition.get_define() == expect - - -def test_switch_condition_get_define_mix_branch_and_default(): - """Test function :func:`get_define` with both :class:`Branch` and :class:`Default`.""" - task = Task(TEST_NAME, TEST_TYPE) - switch_condition = SwitchCondition( - Branch("unittest1", task), Branch("unittest2", task), Default(task) - ) - expect = { - "dependTaskList": [ - {"condition": "unittest1", "nextNode": task.code}, - {"condition": "unittest2", "nextNode": task.code}, - ], - "nextNode": task.code, - } - assert switch_condition.get_define() == expect - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_switch_get_define(mock_task_code_version): - """Test task switch :func:`get_define`.""" - task = Task(name=TEST_NAME, task_type=TEST_TYPE) - switch_condition = SwitchCondition( - Branch(condition="${var1} > 1", task=task), - Branch(condition="${var1} <= 1", task=task), - Default(task), - ) - - name = "test_switch_get_define" - expect = { - "code": 123, - "name": name, - "version": 1, - "description": None, - "delayTime": 0, - "taskType": "SWITCH", - "taskParams": { - "resourceList": [], - "localParams": [], - "waitStartTimeout": {}, - "switchResult": { - "dependTaskList": [ - {"condition": "${var1} > 1", "nextNode": task.code}, - {"condition": "${var1} <= 1", "nextNode": task.code}, - ], - "nextNode": task.code, - }, - }, - "flag": "YES", - "taskPriority": "MEDIUM", - "workerGroup": "default", - "environmentCode": None, - "failRetryTimes": 0, - "failRetryInterval": 1, - "timeoutFlag": "CLOSE", - "timeoutNotifyStrategy": None, - "timeout": 0, - } - - task = Switch(name, condition=switch_condition) - assert task.get_define() == expect - - -@patch( - "pydolphinscheduler.core.task.Task.gen_code_and_version", - return_value=(123, 1), -) -def test_switch_set_dep_workflow(mock_task_code_version): - """Test task switch set dependence in workflow level.""" - with ProcessDefinition(name="test-switch-set-dep-workflow") as pd: - parent = Task(name="parent", task_type=TEST_TYPE) - switch_child_1 = Task(name="switch_child_1", task_type=TEST_TYPE) - switch_child_2 = Task(name="switch_child_2", task_type=TEST_TYPE) - switch_condition = SwitchCondition( - Branch(condition="${var} > 1", task=switch_child_1), - Default(task=switch_child_2), - ) - - switch = Switch(name=TEST_NAME, condition=switch_condition) - parent >> switch - # General tasks test - assert len(pd.tasks) == 4 - assert sorted(pd.task_list, key=lambda t: t.name) == sorted( - [parent, switch, switch_child_1, switch_child_2], key=lambda t: t.name - ) - # Task dep test - assert parent._downstream_task_codes == {switch.code} - assert switch._upstream_task_codes == {parent.code} - - # Switch task dep after ProcessDefinition function get_define called - assert switch._downstream_task_codes == { - switch_child_1.code, - switch_child_2.code, - } - assert all( - [ - child._upstream_task_codes == {switch.code} - for child in [switch_child_1, switch_child_2] - ] - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/test_docs.py b/dolphinscheduler-python/pydolphinscheduler/tests/test_docs.py deleted file mode 100644 index 930e4f709e..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/test_docs.py +++ /dev/null @@ -1,59 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test pydolphinscheduler docs.""" - -import re - -from tests.testing.constants import task_without_example -from tests.testing.path import get_doc_tasks, get_tasks - -ignore_code_file = {"__init__.py"} -ignore_doc_file = {"index.rst"} - - -def test_without_missing_task_rst(): - """Test without missing any task document by compare filename. - - Avoiding add new type of tasks but without adding document about it. - """ - code_files = {p.stem for p in get_tasks(ignore_name=ignore_code_file)} - doc_files = {p.stem for p in get_doc_tasks(ignore_name=ignore_doc_file)} - assert code_files == doc_files - - -def test_task_without_example(): - """Test task document which without example. - - Avoiding add new type of tasks but without adding example content describe how to use it. - """ - task_without_example_detected = set() - pattern = re.compile("Example\n-------") - - for doc in get_doc_tasks(ignore_name=ignore_doc_file): - search_result = pattern.search(doc.read_text()) - if not search_result: - task_without_example_detected.add(doc.stem) - assert task_without_example == task_without_example_detected - - -def test_doc_automodule_directive_name(): - """Test task document with correct name in directive automodule.""" - pattern = re.compile(".. automodule:: (.*)") - for doc in get_doc_tasks(ignore_name=ignore_doc_file): - match_string = pattern.search(doc.read_text()).group(1) - assert f"pydolphinscheduler.tasks.{doc.stem}" == match_string diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/__init__.py deleted file mode 100644 index c8caf5b5af..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init testing package, it provider easy way for pydolphinscheduler test.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/cli.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/cli.py deleted file mode 100644 index 0d2c1d1fbf..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/cli.py +++ /dev/null @@ -1,87 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Utils of command line test.""" - - -from click.testing import CliRunner - -from tests.testing.constants import DEV_MODE - - -class CliTestWrapper: - """Wrap command click CliRunner.invoke.""" - - def __init__(self, *args, **kwargs): - runner = CliRunner() - self.result = runner.invoke(*args, **kwargs) - self.show_result_output() - - def _assert_output(self, output: str = None, fuzzy: bool = False): - """Assert between `CliRunner.invoke.result.output` and parameter `output`. - - :param output: The output will check compare to the ``CliRunner.invoke.output``. - :param fuzzy: A flag define whether assert :param:`output` in fuzzy or not. - Check if `CliRunner.invoke.output` contain :param:`output` is set ``True`` - and CliRunner.invoke.output equal to :param:`output` if we set it ``False``. - """ - if not output: - return - if fuzzy: - assert output in self.result.output - else: - assert self.result.output.rstrip("\n") == output - - def show_result_output(self): - """Print `CliRunner.invoke.result` output content in debug mode. - - It read variable named `PY_DOLPHINSCHEDULER_DEV_MODE` from env, when it set to `true` or `t` or `1` - will print result output when class :class:`CliTestWrapper` is initialization. - """ - if DEV_MODE: - print(f"\n{self.result.output}\n") - - def assert_success(self, output: str = None, fuzzy: bool = False): - """Assert test is success. - - It would check whether `CliRunner.invoke.exit_code` equals to `0`, with no - exception at the same time. It's also can test the content of `CliRunner.invoke.output`. - - :param output: The output will check compare to the ``CliRunner.invoke.output``. - :param fuzzy: A flag define whether assert :param:`output` in fuzzy or not. - Check if `CliRunner.invoke.output` contain :param:`output` is set ``True`` - and CliRunner.invoke.output equal to :param:`output` if we set it ``False``. - """ - assert self.result.exit_code == 0 - if self.result.exception: - raise self.result.exception - self._assert_output(output, fuzzy) - - def assert_fail(self, ret_code: int, output: str = None, fuzzy: bool = False): - """Assert test is fail. - - It would check whether `CliRunner.invoke.exit_code` equals to :param:`ret_code`, - and it will also can test the content of `CliRunner.invoke.output`. - - :param ret_code: The returning code of this fail test. - :param output: The output will check compare to the ``CliRunner.invoke.output``. - :param fuzzy: A flag define whether assert :param:`output` in fuzzy or not. - Check if `CliRunner.invoke.output` contain :param:`output` is set ``True`` - and CliRunner.invoke.output equal to :param:`output` if we set it ``False``. - """ - assert ret_code == self.result.exit_code - self._assert_output(output, fuzzy) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/constants.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/constants.py deleted file mode 100644 index ed2ee37de7..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/constants.py +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Constants variables for test module.""" - -import os - -# Record some task without example in directory `example`. Some of them maybe can not write example, -# but most of them just without adding by mistake, and we should add it later. -task_without_example = { - "sql", - "http", - "sub_process", - "python", - "procedure", -} - -# The examples ignore test to run it. Those examples could not be run directly cause it need other -# support like resource files, data source and etc. But we should try to run them later for more coverage -ignore_exec_examples = { - "task_datax_example", - "task_flink_example", - "task_map_reduce_example", - "task_spark_example", -} - -# pydolphinscheduler environment home -ENV_PYDS_HOME = "PYDS_HOME" - -# whether in dev mode, if true we will add or remove some tests. Or make be and more detail infos when -# test failed. -DEV_MODE = str( - os.environ.get("PY_DOLPHINSCHEDULER_DEV_MODE", False) -).strip().lower() in {"true", "t", "1"} diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/decorator.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/decorator.py deleted file mode 100644 index 78078ee863..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/decorator.py +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Decorator module for testing module.""" - -import types -from functools import wraps - - -def foo(func: types.FunctionType): - """Decorate which do nothing for testing module.""" - - @wraps(func) - def wrapper(): - print("foo decorator called.") - func() - - return wrapper diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/docker_wrapper.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/docker_wrapper.py deleted file mode 100644 index a3d0b6ea7a..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/docker_wrapper.py +++ /dev/null @@ -1,98 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Wrap docker commands for easier create docker container.""" - -import time -from typing import Optional - -import docker -from docker.errors import ImageNotFound -from docker.models.containers import Container - - -class DockerWrapper: - """Wrap docker commands for easier create docker container. - - :param image: The image to create docker container. - """ - - def __init__(self, image: str, container_name: str): - self._client = docker.from_env() - self.image = image - self.container_name = container_name - - def run(self, *args, **kwargs) -> Container: - """Create and run a new container. - - This method would return immediately after the container started, if you wish it return container - object when specific service start, you could see :func:`run_until_log` which return container - object when specific output log appear in docker. - """ - if not self.images_exists: - raise ValueError("Docker image named %s do not exists.", self.image) - return self._client.containers.run( - image=self.image, name=self.container_name, detach=True, *args, **kwargs - ) - - def run_until_log( - self, log: str, remove_exists: Optional[bool] = True, *args, **kwargs - ) -> Container: - """Create and run a new container, return when specific log appear. - - It will call :func:`run` inside this method. And after container started, it would not - return it immediately but run command `docker logs` to see whether specific log appear. - It will raise `RuntimeError` when 10 minutes after but specific log do not appear. - """ - if remove_exists: - self.remove_container() - - log_byte = str.encode(log) - container = self.run(*args, **kwargs) - - timeout_threshold = 10 * 60 - start_time = time.time() - while time.time() <= start_time + timeout_threshold: - if log_byte in container.logs(tail=1000): - break - time.sleep(2) - # Stop container and raise error when reach timeout threshold but do not appear specific log output - else: - container.remove(force=True) - raise RuntimeError( - "Can not capture specific log `%s` in %d seconds, remove container.", - (log, timeout_threshold), - ) - return container - - def remove_container(self): - """Remove container which already running.""" - containers = self._client.containers.list( - all=True, filters={"name": self.container_name} - ) - if containers: - for container in containers: - container.remove(force=True) - - @property - def images_exists(self) -> bool: - """Check whether the image exists in local docker repository or not.""" - try: - self._client.images.get(self.image) - return True - except ImageNotFound: - return False diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/file.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/file.py deleted file mode 100644 index 82e083758f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/file.py +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Testing util about file operating.""" - -from pathlib import Path -from typing import Union - - -def get_file_content(path: Union[str, Path]) -> str: - """Get file content in given path.""" - with open(path, mode="r") as f: - return f.read() - - -def delete_file(path: Union[str, Path]) -> None: - """Delete file in given path.""" - path = Path(path).expanduser() if isinstance(path, str) else path.expanduser() - if path.exists(): - path.unlink() diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/path.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/path.py deleted file mode 100644 index 974ab3d47c..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/path.py +++ /dev/null @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Handle path related issue in test module.""" - -from pathlib import Path -from typing import Any, Generator - -project_root = Path(__file__).parent.parent.parent - -path_code_tasks = project_root.joinpath("src", "pydolphinscheduler", "tasks") -path_example = project_root.joinpath("src", "pydolphinscheduler", "examples") -path_yaml_example = project_root.joinpath("examples", "yaml_define") -path_doc_tasks = project_root.joinpath("docs", "source", "tasks") -path_default_config_yaml = project_root.joinpath( - "src", "pydolphinscheduler", "default_config.yaml" -) - - -def get_all_examples() -> Generator[Path, Any, None]: - """Get all examples files path in examples directory.""" - return (ex for ex in path_example.iterdir() if ex.is_file()) - - -def get_tasks(ignore_name: set = None) -> Generator[Path, Any, None]: - """Get all tasks files path in src/pydolphinscheduler/tasks directory.""" - if not ignore_name: - ignore_name = set() - return ( - ex - for ex in path_code_tasks.iterdir() - if ex.is_file() and ex.name not in ignore_name - ) - - -def get_doc_tasks(ignore_name: set = None) -> Generator[Path, Any, None]: - """Get all tasks document path in docs/source/tasks directory.""" - if not ignore_name: - ignore_name = set() - return ( - ex - for ex in path_doc_tasks.iterdir() - if ex.is_file() and ex.name not in ignore_name - ) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/testing/task.py b/dolphinscheduler-python/pydolphinscheduler/tests/testing/task.py deleted file mode 100644 index 11ffbf1e6f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/testing/task.py +++ /dev/null @@ -1,47 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Mock class Task for other test.""" - -import uuid - -from pydolphinscheduler.core.task import Task as SourceTask - - -class Task(SourceTask): - """Mock class :class:`pydolphinscheduler.core.task.Task` for unittest.""" - - DEFAULT_VERSION = 1 - - def gen_code_and_version(self): - """Mock java gateway code and version, convenience method for unittest.""" - return uuid.uuid1().time, self.DEFAULT_VERSION - - -class TaskWithCode(SourceTask): - """Mock class :class:`pydolphinscheduler.core.task.Task` and it return some code and version.""" - - def __init__( - self, name: str, task_type: str, code: int, version: int, *args, **kwargs - ): - self._constant_code = code - self._constant_version = version - super().__init__(name, task_type, *args, **kwargs) - - def gen_code_and_version(self): - """Mock java gateway code and version, convenience method for unittest.""" - return self._constant_code, self._constant_version diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/utils/__init__.py b/dolphinscheduler-python/pydolphinscheduler/tests/utils/__init__.py deleted file mode 100644 index 119f825bc0..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/utils/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Init tests for utils package.""" diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_date.py b/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_date.py deleted file mode 100644 index b9f8ce5ff3..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_date.py +++ /dev/null @@ -1,78 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test utils.date module.""" - -from datetime import datetime - -import pytest - -from pydolphinscheduler.utils.date import FMT_STD, conv_from_str, conv_to_schedule - -curr_date = datetime.now() - - -@pytest.mark.parametrize( - "src,expect", - [ - (curr_date, curr_date.strftime(FMT_STD)), - (datetime(2021, 1, 1), "2021-01-01 00:00:00"), - (datetime(2021, 1, 1, 1), "2021-01-01 01:00:00"), - (datetime(2021, 1, 1, 1, 1), "2021-01-01 01:01:00"), - (datetime(2021, 1, 1, 1, 1, 1), "2021-01-01 01:01:01"), - (datetime(2021, 1, 1, 1, 1, 1, 1), "2021-01-01 01:01:01"), - ], -) -def test_conv_to_schedule(src: datetime, expect: str) -> None: - """Test function conv_to_schedule.""" - assert expect == conv_to_schedule(src) - - -@pytest.mark.parametrize( - "src,expect", - [ - ("2021-01-01", datetime(2021, 1, 1)), - ("2021/01/01", datetime(2021, 1, 1)), - ("20210101", datetime(2021, 1, 1)), - ("2021-01-01 01:01:01", datetime(2021, 1, 1, 1, 1, 1)), - ("2021/01/01 01:01:01", datetime(2021, 1, 1, 1, 1, 1)), - ("20210101 010101", datetime(2021, 1, 1, 1, 1, 1)), - ], -) -def test_conv_from_str_success(src: str, expect: datetime) -> None: - """Test function conv_from_str success case.""" - assert expect == conv_from_str( - src - ), f"Function conv_from_str convert {src} not expect to {expect}." - - -@pytest.mark.parametrize( - "src", - [ - "2021-01-01 010101", - "2021:01:01", - "202111", - "20210101010101", - "2021:01:01 01:01:01", - ], -) -def test_conv_from_str_not_impl(src: str) -> None: - """Test function conv_from_str fail case.""" - with pytest.raises( - NotImplementedError, match=".*? could not be convert to datetime for now." - ): - conv_from_str(src) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_file.py b/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_file.py deleted file mode 100644 index 4cc6df402f..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_file.py +++ /dev/null @@ -1,85 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test file utils.""" - -import shutil -from pathlib import Path - -import pytest - -from pydolphinscheduler.utils import file -from tests.testing.file import delete_file, get_file_content - -content = "test_content" -file_path = "/tmp/test/file/test_file_write.txt" - - -@pytest.fixture -def teardown_del_file(): - """Teardown about delete file.""" - yield - delete_file(file_path) - - -@pytest.fixture -def setup_crt_first(): - """Set up and teardown about create file first and then delete it.""" - file.write(content=content, to_path=file_path) - yield - delete_file(file_path) - - -def test_write_content(teardown_del_file): - """Test function :func:`write` on write behavior with correct content.""" - assert not Path(file_path).exists() - file.write(content=content, to_path=file_path) - assert Path(file_path).exists() - assert content == get_file_content(file_path) - - -def test_write_not_create_parent(teardown_del_file): - """Test function :func:`write` with parent not exists and do not create path.""" - file_test_dir = Path(file_path).parent - if file_test_dir.exists(): - shutil.rmtree(str(file_test_dir)) - assert not file_test_dir.exists() - with pytest.raises( - ValueError, - match="Parent directory do not exists and set param `create` to `False`", - ): - file.write(content=content, to_path=file_path, create=False) - - -def test_write_overwrite(setup_crt_first): - """Test success with file exists but set ``True`` to overwrite.""" - assert Path(file_path).exists() - - new_content = f"new_{content}" - file.write(content=new_content, to_path=file_path, overwrite=True) - assert new_content == get_file_content(file_path) - - -def test_write_overwrite_error(setup_crt_first): - """Test error with file exists but set ``False`` to overwrite.""" - assert Path(file_path).exists() - - new_content = f"new_{content}" - with pytest.raises( - FileExistsError, match=".*already exists and you choose not overwrite mode\\." - ): - file.write(content=new_content, to_path=file_path) diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_string.py b/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_string.py deleted file mode 100644 index 2ccd206df1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_string.py +++ /dev/null @@ -1,87 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test utils.string module.""" - -import pytest - -from pydolphinscheduler.utils.string import attr2camel, class_name2camel, snake2camel - - -@pytest.mark.parametrize( - "snake, expect", - [ - ("snake_case", "snakeCase"), - ("snake_123case", "snake123Case"), - ("snake_c_a_s_e", "snakeCASE"), - ("snake__case", "snakeCase"), - ("snake_case_case", "snakeCaseCase"), - ("_snake_case", "SnakeCase"), - ("__snake_case", "SnakeCase"), - ("Snake_case", "SnakeCase"), - ], -) -def test_snake2camel(snake: str, expect: str): - """Test function snake2camel, this is a base function for utils.string.""" - assert expect == snake2camel( - snake - ), f"Test case {snake} do no return expect result {expect}." - - -@pytest.mark.parametrize( - "attr, expects", - [ - # source attribute, (true expect, false expect), - ("snake_case", ("snakeCase", "snakeCase")), - ("snake_123case", ("snake123Case", "snake123Case")), - ("snake_c_a_s_e", ("snakeCASE", "snakeCASE")), - ("snake__case", ("snakeCase", "snakeCase")), - ("snake_case_case", ("snakeCaseCase", "snakeCaseCase")), - ("_snake_case", ("snakeCase", "SnakeCase")), - ("__snake_case", ("snakeCase", "SnakeCase")), - ("Snake_case", ("SnakeCase", "SnakeCase")), - ], -) -def test_attr2camel(attr: str, expects: tuple): - """Test function attr2camel.""" - for idx, expect in enumerate(expects): - include_private = idx % 2 == 0 - assert expect == attr2camel( - attr, include_private - ), f"Test case {attr} do no return expect result {expect} when include_private is {include_private}." - - -@pytest.mark.parametrize( - "class_name, expect", - [ - ("snake_case", "snakeCase"), - ("snake_123case", "snake123Case"), - ("snake_c_a_s_e", "snakeCASE"), - ("snake__case", "snakeCase"), - ("snake_case_case", "snakeCaseCase"), - ("_snake_case", "snakeCase"), - ("_Snake_case", "snakeCase"), - ("__snake_case", "snakeCase"), - ("__Snake_case", "snakeCase"), - ("Snake_case", "snakeCase"), - ], -) -def test_class_name2camel(class_name: str, expect: str): - """Test function class_name2camel.""" - assert expect == class_name2camel( - class_name - ), f"Test case {class_name} do no return expect result {expect}." diff --git a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_yaml_parser.py b/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_yaml_parser.py deleted file mode 100644 index ad3aaf7bd1..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tests/utils/test_yaml_parser.py +++ /dev/null @@ -1,255 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test utils.path_dict module.""" - -from typing import Dict - -import pytest -from ruamel.yaml import YAML - -from pydolphinscheduler.utils.yaml_parser import YamlParser -from tests.testing.path import path_default_config_yaml - -yaml = YAML() - -expects = [ - { - # yaml.load("no need test") is a flag about skipping it because it to different to maintainer - "name": yaml.load("no need test"), - "name.family": ("Smith", "SmithEdit"), - "name.given": ("Alice", "AliceEdit"), - "name.mark": yaml.load("no need test"), - "name.mark.name_mark": yaml.load("no need test"), - "name.mark.name_mark.key": ("value", "valueEdit"), - }, - { - # yaml.load("no need test") is a flag about skipping it because it to different to maintainer - "java_gateway": yaml.load("no need test"), - "java_gateway.address": ("127.0.0.1", "127.1.1.1"), - "java_gateway.port": (25333, 25555), - "java_gateway.auto_convert": (True, False), - "default": yaml.load("no need test"), - "default.user": yaml.load("no need test"), - "default.user.name": ("userPythonGateway", "userPythonGatewayEdit"), - "default.user.password": ("userPythonGateway", "userPythonGatewayEdit"), - "default.user.email": ( - "userPythonGateway@dolphinscheduler.com", - "userEdit@dolphinscheduler.com", - ), - "default.user.tenant": ("tenant_pydolphin", "tenant_pydolphinEdit"), - "default.user.phone": (11111111111, 22222222222), - "default.user.state": (1, 0), - "default.workflow": yaml.load("no need test"), - "default.workflow.project": ("project-pydolphin", "project-pydolphinEdit"), - "default.workflow.tenant": ("tenant_pydolphin", "SmithEdit"), - "default.workflow.user": ("userPythonGateway", "SmithEdit"), - "default.workflow.queue": ("queuePythonGateway", "queueEdit"), - "default.workflow.worker_group": ("default", "wgEdit"), - "default.workflow.release_state": ("online", "offline"), - "default.workflow.time_zone": ("Asia/Shanghai", "Europe/Amsterdam"), - "default.workflow.warning_type": ("NONE", "SUCCESS"), - }, -] - -param = [ - """#example -name: - # details - family: Smith # very common - given: Alice # one of the siblings - mark: - name_mark: - key: value -""" -] - -with open(path_default_config_yaml, "r") as f: - param.append(f.read()) - - -@pytest.mark.parametrize( - "src, delimiter, expect", - [ - ( - param[0], - "|", - expects[0], - ), - ( - param[1], - "/", - expects[1], - ), - ], -) -def test_yaml_parser_specific_delimiter(src: str, delimiter: str, expect: Dict): - """Test specific delimiter for :class:`YamlParser`.""" - - def ch_dl(key): - return key.replace(".", delimiter) - - yaml_parser = YamlParser(src, delimiter=delimiter) - assert all( - [ - expect[key][0] == yaml_parser[ch_dl(key)] - for key in expect - if expect[key] != "no need test" - ] - ) - assert all( - [ - expect[key][0] == yaml_parser.get(ch_dl(key)) - for key in expect - if expect[key] != "no need test" - ] - ) - - -@pytest.mark.parametrize( - "src, expect", - [ - ( - param[0], - expects[0], - ), - ( - param[1], - expects[1], - ), - ], -) -def test_yaml_parser_contains(src: str, expect: Dict): - """Test magic function :func:`YamlParser.__contain__` also with `key in obj` syntax.""" - yaml_parser = YamlParser(src) - assert len(expect.keys()) == len( - yaml_parser.dict_parser.keys() - ), "Parser keys length not equal to expect keys length" - assert all( - [key in yaml_parser for key in expect] - ), "Parser keys not equal to expect keys" - - -@pytest.mark.parametrize( - "src, expect", - [ - ( - param[0], - expects[0], - ), - ( - param[1], - expects[1], - ), - ], -) -def test_yaml_parser_get(src: str, expect: Dict): - """Test magic function :func:`YamlParser.__getitem__` also with `obj[key]` syntax.""" - yaml_parser = YamlParser(src) - assert all( - [ - expect[key][0] == yaml_parser[key] - for key in expect - if expect[key] != "no need test" - ] - ) - assert all( - [ - expect[key][0] == yaml_parser.get(key) - for key in expect - if expect[key] != "no need test" - ] - ) - - -@pytest.mark.parametrize( - "src, expect", - [ - ( - param[0], - expects[0], - ), - ( - param[1], - expects[1], - ), - ], -) -def test_yaml_parser_set(src: str, expect: Dict): - """Test magic function :func:`YamlParser.__setitem__` also with `obj[key] = val` syntax.""" - yaml_parser = YamlParser(src) - for key in expect: - assert key in yaml_parser.dict_parser.keys() - if expect[key] == "no need test": - continue - assert expect[key][0] == yaml_parser.dict_parser[key] - assert expect[key][1] != yaml_parser.dict_parser[key] - - yaml_parser[key] = expect[key][1] - assert expect[key][0] != yaml_parser.dict_parser[key] - assert expect[key][1] == yaml_parser.dict_parser[key] - - -@pytest.mark.parametrize( - "src, setter, expect", - [ - ( - param[0], - {"name.mark.name_mark.key": "edit"}, - """#example -name: - # details - family: Smith # very common - given: Alice # one of the siblings - mark: - name_mark: - key: edit -""", - ), - ( - param[0], - { - "name.family": "SmithEdit", - "name.given": "AliceEdit", - "name.mark.name_mark.key": "edit", - }, - """#example -name: - # details - family: SmithEdit # very common - given: AliceEdit # one of the siblings - mark: - name_mark: - key: edit -""", - ), - ], -) -def test_yaml_parser_str_repr(src: str, setter: Dict, expect: str): - """Test function :func:`YamlParser.to_string`.""" - yaml_parser = YamlParser(src) - - # Equal before change - assert f"YamlParser({src})" == repr(yaml_parser) - assert src == str(yaml_parser) - - for key, val in setter.items(): - yaml_parser[key] = val - - # Equal after changed - assert expect == str(yaml_parser) - assert f"YamlParser({expect})" == repr(yaml_parser) diff --git a/dolphinscheduler-python/pydolphinscheduler/tox.ini b/dolphinscheduler-python/pydolphinscheduler/tox.ini deleted file mode 100644 index 4ce8043265..0000000000 --- a/dolphinscheduler-python/pydolphinscheduler/tox.ini +++ /dev/null @@ -1,79 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[tox] -envlist = local-ci, auto-lint, lint, doc-build, doc-build-multi, code-test, integrate-test, local-integrate-test, py{36,37,38,39,310,311} - -[testenv] -allowlist_externals = - make - git - -[testenv:auto-lint] -extras = style -commands = - python -m isort . - python -m black . - python -m autoflake --in-place --remove-all-unused-imports --ignore-init-module-imports --recursive . - -[testenv:lint] -extras = style -commands = - python -m isort --check . - python -m black --check . - python -m flake8 - python -m autoflake --remove-all-unused-imports --ignore-init-module-imports --check --recursive . - -[testenv:code-test] -extras = test -# Run both tests and coverage -commands = - python -m pytest --cov=pydolphinscheduler --cov-config={toxinidir}/.coveragerc tests/ - -[testenv:doc-build] -extras = doc -commands = - make -C {toxinidir}/docs clean - make -C {toxinidir}/docs html - -[testenv:doc-build-multi] -extras = doc -commands = - # Get all tags for `multiversion` subcommand - git fetch --tags - make -C {toxinidir}/docs clean - make -C {toxinidir}/docs multiversion - -[testenv:integrate-test] -extras = test -commands = - python -m pytest tests/integration/ - -[testenv:local-integrate-test] -extras = test -setenv = - skip_launch_docker = true -commands = - {[testenv:integrate-test]commands} - -# local-ci do not build `doc-build-multi` -[testenv:local-ci] -extras = dev -commands = - {[testenv:lint]commands} - {[testenv:code-test]commands} - {[testenv:doc-build]commands} diff --git a/pom.xml b/pom.xml index 4d8d29791a..714af0172e 100755 --- a/pom.xml +++ b/pom.xml @@ -49,7 +49,6 @@ dolphinscheduler-data-quality dolphinscheduler-standalone-server dolphinscheduler-datasource-plugin - dolphinscheduler-python dolphinscheduler-meter dolphinscheduler-master dolphinscheduler-worker @@ -91,8 +90,6 @@ ${project.version} true true - - true true @@ -175,11 +172,6 @@ ${project.version} - - org.apache.dolphinscheduler - dolphinscheduler-python - ${project.version} - org.apache.dolphinscheduler dolphinscheduler-alert-api