Browse Source

[cherry-pick][python] Make dolphinscheduler python API works to 2.0.2 (#7608)

* [cherry-pick][python] Make it work to 2.0.2

* Remove unused ProcessExecutionTypeEnum

* Add queryByName to project

* Add checkTenantExists to tenant

* Add queryByTenantCode to tenant

* Add queryQueueName to queue

* Add all content from dev branch

* Add gitignore

* Add pydolphinscheduler content

* Add ds-py to bin test

* Py merge to 202

* Fix version

* Fix missing variable

* Add py4j as known deps

* Fix core database bug
2.0.7-release
Jiajie Zhong 3 years ago committed by GitHub
parent
commit
9e068d23e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 14
      .gitignore
  2. 15
      docker/build/conf/dolphinscheduler/supervisor/supervisor.ini
  3. 19
      docker/build/startup.sh
  4. 9
      dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/ProjectService.java
  5. 8
      dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/QueueService.java
  6. 16
      dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/TenantService.java
  7. 15
      dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/ProjectServiceImpl.java
  8. 26
      dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/QueueServiceImpl.java
  9. 19
      dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/TenantServiceImpl.java
  10. 7
      dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/QueueMapper.java
  11. 9
      dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/QueueMapper.xml
  12. 5
      dolphinscheduler-dist/pom.xml
  13. 9
      dolphinscheduler-dist/src/main/assembly/dolphinscheduler-bin.xml
  14. 60
      dolphinscheduler-python/pom.xml
  15. 32
      dolphinscheduler-python/pydolphinscheduler/.coveragerc
  16. 37
      dolphinscheduler-python/pydolphinscheduler/.flake8
  17. 19
      dolphinscheduler-python/pydolphinscheduler/.isort.cfg
  18. 173
      dolphinscheduler-python/pydolphinscheduler/README.md
  19. 34
      dolphinscheduler-python/pydolphinscheduler/ROADMAP.md
  20. 55
      dolphinscheduler-python/pydolphinscheduler/examples/bulk_create.py
  21. 55
      dolphinscheduler-python/pydolphinscheduler/examples/task_conditions_example.py
  22. 50
      dolphinscheduler-python/pydolphinscheduler/examples/task_datax_example.py
  23. 72
      dolphinscheduler-python/pydolphinscheduler/examples/task_dependent_example.py
  24. 50
      dolphinscheduler-python/pydolphinscheduler/examples/task_switch_example.py
  25. 52
      dolphinscheduler-python/pydolphinscheduler/examples/tutorial.py
  26. 22
      dolphinscheduler-python/pydolphinscheduler/pytest.ini
  27. 18
      dolphinscheduler-python/pydolphinscheduler/requirements.txt
  28. 27
      dolphinscheduler-python/pydolphinscheduler/requirements_dev.txt
  29. 16
      dolphinscheduler-python/pydolphinscheduler/setup.cfg
  30. 94
      dolphinscheduler-python/pydolphinscheduler/setup.py
  31. 18
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py
  32. 122
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py
  33. 18
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py
  34. 74
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/base.py
  35. 40
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/base_side.py
  36. 64
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/database.py
  37. 360
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/process_definition.py
  38. 268
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py
  39. 46
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/exceptions.py
  40. 55
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/java_gateway.py
  41. 22
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/__init__.py
  42. 42
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/project.py
  43. 42
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/queue.py
  44. 45
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/tenant.py
  45. 70
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/user.py
  46. 30
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/worker_group.py
  47. 18
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py
  48. 185
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py
  49. 121
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/datax.py
  50. 274
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dependent.py
  51. 101
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/http.py
  52. 60
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/procedure.py
  53. 51
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/python.py
  54. 38
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py
  55. 99
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py
  56. 55
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sub_process.py
  57. 158
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/switch.py
  58. 18
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/__init__.py
  59. 82
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/date.py
  60. 39
      dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/string.py
  61. 18
      dolphinscheduler-python/pydolphinscheduler/tests/__init__.py
  62. 18
      dolphinscheduler-python/pydolphinscheduler/tests/core/__init__.py
  63. 54
      dolphinscheduler-python/pydolphinscheduler/tests/core/test_database.py
  64. 342
      dolphinscheduler-python/pydolphinscheduler/tests/core/test_process_definition.py
  65. 224
      dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py
  66. 18
      dolphinscheduler-python/pydolphinscheduler/tests/tasks/__init__.py
  67. 439
      dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py
  68. 124
      dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_datax.py
  69. 793
      dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dependent.py
  70. 144
      dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_http.py
  71. 106
      dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_procedure.py
  72. 122
      dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_python.py
  73. 89
      dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_shell.py
  74. 149
      dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py
  75. 114
      dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sub_process.py
  76. 300
      dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_switch.py
  77. 52
      dolphinscheduler-python/pydolphinscheduler/tests/test_java_gateway.py
  78. 18
      dolphinscheduler-python/pydolphinscheduler/tests/testing/__init__.py
  79. 32
      dolphinscheduler-python/pydolphinscheduler/tests/testing/task.py
  80. 18
      dolphinscheduler-python/pydolphinscheduler/tests/utils/__init__.py
  81. 78
      dolphinscheduler-python/pydolphinscheduler/tests/utils/test_date.py
  82. 87
      dolphinscheduler-python/pydolphinscheduler/tests/utils/test_string.py
  83. 161
      dolphinscheduler-python/src/main/java/org/apache/dolphinscheduler/server/PythonGatewayServer.java
  84. 5
      dolphinscheduler-server/src/main/resources/config/install_config.conf
  85. 4
      dolphinscheduler-standalone-server/pom.xml
  86. 2
      dolphinscheduler-standalone-server/src/main/java/org/apache/dolphinscheduler/server/StandaloneServer.java
  87. 1
      pom.xml
  88. 5
      script/dolphinscheduler-daemon.sh
  89. 7
      script/start-all.sh
  90. 8
      script/status-all.sh
  91. 7
      script/stop-all.sh
  92. 1
      tools/dependencies/known-dependencies.txt

14
.gitignore vendored

@ -46,3 +46,17 @@ dolphinscheduler-server/src/main/resources/logback.xml
dolphinscheduler-ui/dist
dolphinscheduler-ui/node
docker/build/apache-dolphinscheduler*
# ------------------
# pydolphinscheduler
# ------------------
# Cache
__pycache__/
# Build
build/
*egg-info/
# Test coverage
.coverage
htmlcov/

15
docker/build/conf/dolphinscheduler/supervisor/supervisor.ini

@ -105,3 +105,18 @@ killasgroup=true
redirect_stderr=true
stdout_logfile=/dev/fd/1
stdout_logfile_maxbytes=0
[program:python-gateway]
command=%(ENV_DOLPHINSCHEDULER_BIN)s/dolphinscheduler-daemon.sh start python-gateway
directory=%(ENV_DOLPHINSCHEDULER_HOME)s
priority=999
autostart=%(ENV_STANDALONE_START_ENABLED)s
autorestart=true
startsecs=5
stopwaitsecs=3
exitcodes=0
stopasgroup=true
killasgroup=true
redirect_stderr=true
stdout_logfile=/dev/fd/1
stdout_logfile_maxbytes=0

19
docker/build/startup.sh

@ -25,6 +25,7 @@ export API_START_ENABLED=false
export ALERT_START_ENABLED=false
export LOGGER_START_ENABLED=false
export STANDALONE_START_ENABLED=false
export PYTHON_GATEWAY_START_ENABLED=false
# wait database
waitDatabase() {
@ -68,13 +69,14 @@ waitZK() {
printUsage() {
echo -e "Dolphin Scheduler is a distributed and easy-to-expand visual DAG workflow scheduling system,"
echo -e "dedicated to solving the complex dependencies in data processing, making the scheduling system out of the box for data processing.\n"
echo -e "Usage: [ all | master-server | worker-server | api-server | alert-server | standalone-server ]\n"
printf "%-13s: %s\n" "all" "Run master-server, worker-server, api-server and alert-server"
printf "%-13s: %s\n" "master-server" "MasterServer is mainly responsible for DAG task split, task submission monitoring."
printf "%-13s: %s\n" "worker-server" "WorkerServer is mainly responsible for task execution and providing log services."
printf "%-13s: %s\n" "api-server" "ApiServer is mainly responsible for processing requests and providing the front-end UI layer."
printf "%-13s: %s\n" "alert-server" "AlertServer mainly include Alarms."
printf "%-13s: %s\n" "standalone-server" "Standalone server that uses embedded zookeeper and database, only for testing and demostration."
echo -e "Usage: [ all | master-server | worker-server | api-server | alert-server | standalone-server | python-gateway-server]\n"
printf "%-13s: %s\n" "all" "Run master-server, worker-server, api-server, alert-server, python-gateway-server"
printf "%-13s: %s\n" "master-server" "MasterServer is mainly responsible for DAG task split, task submission monitoring."
printf "%-13s: %s\n" "worker-server" "WorkerServer is mainly responsible for task execution and providing log services."
printf "%-13s: %s\n" "api-server" "ApiServer is mainly responsible for processing requests and providing the front-end UI layer."
printf "%-13s: %s\n" "alert-server" "AlertServer mainly include Alarms."
printf "%-13s: %s\n" "standalone-server" "Standalone server that uses embedded zookeeper and database, only for testing and demostration."
printf "%-13s: %s\n" "python-gateway-server" "Python gateway is a backend server for python API."
}
# init config file
@ -115,6 +117,9 @@ case "$1" in
(standalone-server)
export STANDALONE_START_ENABLED=true
;;
(python-gateway-server)
export PYTHON_GATEWAY_START_ENABLED=true
;;
(help)
printUsage
exit 1

9
dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/ProjectService.java

@ -46,6 +46,15 @@ public interface ProjectService {
*/
Map<String, Object> queryByCode(User loginUser, long projectCode);
/**
* query project details by name
*
* @param loginUser login user
* @param projectName project name
* @return project detail information
*/
Map<String, Object> queryByName(User loginUser, String projectName);
/**
* check project and authorization
*

8
dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/QueueService.java

@ -76,4 +76,12 @@ public interface QueueService {
*/
Result<Object> verifyQueue(String queue, String queueName);
/**
* query queue by queueName
*
* @param queueName queue name
* @return queue object for provide queue name
*/
Map<String, Object> queryQueueName(String queueName);
}

16
dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/TenantService.java

@ -92,4 +92,20 @@ public interface TenantService {
* @return true if tenant code can user, otherwise return false
*/
Result verifyTenantCode(String tenantCode);
/**
* check if provide tenant code object exists
*
* @param tenantCode tenant code
* @return true if tenant code exists, false if not
*/
boolean checkTenantExists(String tenantCode);
/**
* query tenant by tenant code
*
* @param tenantCode tenant code
* @return tenant list
*/
Map<String, Object> queryByTenantCode(String tenantCode);
}

15
dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/ProjectServiceImpl.java

@ -139,6 +139,21 @@ public class ProjectServiceImpl extends BaseServiceImpl implements ProjectServic
return result;
}
@Override
public Map<String, Object> queryByName(User loginUser, String projectName) {
Map<String, Object> result = new HashMap<>();
Project project = projectMapper.queryByName(projectName);
boolean hasProjectAndPerm = hasProjectAndPerm(loginUser, project, result);
if (!hasProjectAndPerm) {
return result;
}
if (project != null) {
result.put(Constants.DATA_LIST, project);
putMsg(result, Status.SUCCESS);
}
return result;
}
/**
* check project and authorization
*

26
dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/QueueServiceImpl.java

@ -263,6 +263,32 @@ public class QueueServiceImpl extends BaseServiceImpl implements QueueService {
return result;
}
/**
* query queue by queueName
*
* @param queueName queue name
* @return queue object for provide queue name
*/
@Override
public Map<String, Object> queryQueueName(String queueName) {
Map<String, Object> result = new HashMap<>();
if (StringUtils.isEmpty(queueName)) {
putMsg(result, Status.REQUEST_PARAMS_NOT_VALID_ERROR, Constants.QUEUE_NAME);
return result;
}
if (!checkQueueNameExist(queueName)) {
putMsg(result, Status.QUEUE_NOT_EXIST, queueName);
return result;
}
List<Queue> queueList = queueMapper.queryQueueName(queueName);
result.put(Constants.DATA_LIST, queueList);
putMsg(result, Status.SUCCESS);
return result;
}
/**
* check queue exist
* if exists return truenot exists return false

19
dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/service/impl/TenantServiceImpl.java

@ -318,8 +318,25 @@ public class TenantServiceImpl extends BaseServiceImpl implements TenantService
* @param tenantCode tenant code
* @return ture if the tenant code exists, otherwise return false
*/
private boolean checkTenantExists(String tenantCode) {
public boolean checkTenantExists(String tenantCode) {
Boolean existTenant = tenantMapper.existTenant(tenantCode);
return existTenant == Boolean.TRUE;
}
/**
* query tenant by tenant code
*
* @param tenantCode tenant code
* @return tenant detail information
*/
@Override
public Map<String, Object> queryByTenantCode(String tenantCode) {
Map<String, Object> result = new HashMap<>();
Tenant tenant = tenantMapper.queryByTenantCode(tenantCode);
if (tenant != null) {
result.put(Constants.DATA_LIST, tenant);
putMsg(result, Status.SUCCESS);
}
return result;
}
}

7
dolphinscheduler-dao/src/main/java/org/apache/dolphinscheduler/dao/mapper/QueueMapper.java

@ -53,4 +53,11 @@ public interface QueueMapper extends BaseMapper<Queue> {
* @return true if exist else return null
*/
Boolean existQueue(@Param("queue") String queue, @Param("queueName") String queueName);
/**
* query queue by queue name
* @param queueName queueName
* @return queue list
*/
List<Queue> queryQueueName(@Param("queueName") String queueName);
}

9
dolphinscheduler-dao/src/main/resources/org/apache/dolphinscheduler/dao/mapper/QueueMapper.xml

@ -54,4 +54,13 @@
and queue_name =#{queueName}
</if>
</select>
<select id="queryQueueName" resultType="org.apache.dolphinscheduler.dao.entity.Queue">
select
<include refid="baseSql"/>
from t_ds_queue
where 1 = 1
<if test="queueName != null and queueName != ''">
and queue_name =#{queueName}
</if>
</select>
</mapper>

5
dolphinscheduler-dist/pom.xml vendored

@ -51,6 +51,11 @@
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-alert-server</artifactId>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-python</artifactId>
</dependency>
</dependencies>
<profiles>

9
dolphinscheduler-dist/src/main/assembly/dolphinscheduler-bin.xml vendored

@ -113,6 +113,15 @@
<outputDirectory>conf</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/../dolphinscheduler-python/src/main/resources</directory>
<includes>
<include>**/*.yaml</include>
<include>**/*.xml</include>
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/../dolphinscheduler-dist/target/dolphinscheduler-dist-${project.version}</directory>
<includes>

60
dolphinscheduler-python/pom.xml

@ -0,0 +1,60 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler</artifactId>
<version>2.0.2-SNAPSHOT</version>
</parent>
<artifactId>dolphinscheduler-python</artifactId>
<name>${project.artifactId}</name>
<packaging>jar</packaging>
<dependencies>
<!-- dolphinscheduler -->
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-api</artifactId>
</dependency>
<!--springboot-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-tomcat</artifactId>
</exclusion>
<exclusion>
<artifactId>log4j-to-slf4j</artifactId>
<groupId>org.apache.logging.log4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>net.sf.py4j</groupId>
<artifactId>py4j</artifactId>
</dependency>
</dependencies>
</project>

32
dolphinscheduler-python/pydolphinscheduler/.coveragerc

@ -0,0 +1,32 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
[run]
command_line = -m pytest
omit =
# Ignore all test cases in tests/
tests/*
# TODO. Temporary ignore java_gateway file, because we could not find good way to test it.
src/pydolphinscheduler/java_gateway.py
[report]
# Don’t report files that are 100% covered
skip_covered = True
show_missing = True
precision = 2
# Report will fail when coverage under 90.00%
fail_under = 85

37
dolphinscheduler-python/pydolphinscheduler/.flake8

@ -0,0 +1,37 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
[flake8]
max-line-length = 110
exclude =
.git,
__pycache__,
.pytest_cache,
*.egg-info,
docs/source/conf.py
old,
build,
dist,
htmlcov
ignore =
# It's clear and not need to add docstring
D107, # D107: Don't require docstrings on __init__
D105, # D105: Missing docstring in magic method
# Conflict to Black
W503 # W503: Line breaks before binary operators
per-file-ignores =
src/pydolphinscheduler/side/__init__.py:F401

19
dolphinscheduler-python/pydolphinscheduler/.isort.cfg

@ -0,0 +1,19 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
[settings]
profile=black

173
dolphinscheduler-python/pydolphinscheduler/README.md

@ -0,0 +1,173 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
# pydolphinscheduler
[![GitHub Build][ga-py-test]][ga]
[![Code style: black][black-shield]][black-gh]
[![Imports: isort][isort-shield]][isort-gh]
pydolphinscheduler is python API for Apache DolphinScheduler, which allow you definition
your workflow by python code, aka workflow-as-codes.
## Quick Start
> **_Notice:_** For now, due to pydolphinscheduler without release to any binary tarball or [PyPI][pypi], you
> have to clone Apache DolphinScheduler code from GitHub to ensure quick start setup
Here we show you how to install and run a simple example of pydolphinscheduler
### Prepare
```shell
# Clone code from github
git clone git@github.com:apache/dolphinscheduler.git
# Install pydolphinscheduler from source
cd dolphinscheduler-python/pydolphinscheduler
pip install -e .
```
### Start Server And Run Example
Before you run an example, you have to start backend server. You could follow [development setup][dev-setup]
section "DolphinScheduler Standalone Quick Start" to set up developer environment. You have to start backend
and frontend server in this step, which mean that you could view DolphinScheduler UI in your browser with URL
http://localhost:12345/dolphinscheduler
After backend server is being start, all requests from `pydolphinscheduler` would be sent to backend server.
And for now we could run a simple example by:
```shell
cd dolphinscheduler-python/pydolphinscheduler
python example/tutorial.py
```
> **_NOTICE:_** Since Apache DolphinScheduler's tenant is requests while running command, you might need to change
> tenant value in `example/tutorial.py`. For now the value is `tenant_exists`, please change it to username exists
> in you environment.
After command execute, you could see a new project with single process definition named *tutorial* in the [UI][ui-project].
Until now, we finish quick start by an example of pydolphinscheduler and run it. If you want to inspect or join
pydolphinscheduler develop, you could take a look at [develop](#develop)
## Develop
pydolphinscheduler is python API for Apache DolphinScheduler, it just defines what workflow look like instead of
store or execute it. We here use [py4j][py4j] to dynamically access Java Virtual Machine.
### Setup Develop Environment
We already clone the code in [quick start](#quick-start), so next step we have to open pydolphinscheduler project
in you editor. We recommend you use [pycharm][pycharm] instead of [IntelliJ IDEA][idea] to open it. And you could
just open directory `dolphinscheduler-python/pydolphinscheduler` instead of `dolphinscheduler-python`.
Then you should add developer dependence to make sure you could run test and check code style locally
```shell
pip install -r requirements_dev.txt
```
### Brief Concept
Apache DolphinScheduler is design to define workflow by UI, and pydolphinscheduler try to define it by code. When
define by code, user usually do not care user, tenant, or queue exists or not. All user care about is created
a new workflow by the code his/her definition. So we have some **side object** in `pydolphinscheduler/side`
directory, their only check object exists or not, and create them if not exists.
#### Process Definition
pydolphinscheduler workflow object name, process definition is also same name as Java object(maybe would be change to
other word for more simple).
#### Tasks
pydolphinscheduler tasks object, we use tasks to define exact job we want DolphinScheduler do for us. For now,
we only support `shell` task to execute shell task. [This link][all-task] list all tasks support in DolphinScheduler
and would be implemented in the further.
### Code Style
We use [isort][isort] to automatically keep Python imports alphabetically, and use [Black][black] for code
formatter and [Flake8][flake8] for pep8 checker. If you use [pycharm][pycharm]or [IntelliJ IDEA][idea],
maybe you could follow [Black-integration][black-editor] to configure them in your environment.
Our Python API CI would automatically run code style checker and unittest when you submit pull request in
GitHub, you could also run static check locally.
```shell
# We recommend you run isort and Black before Flake8, because Black could auto fix some code style issue
# but Flake8 just hint when code style not match pep8
# Run Isort
isort .
# Run Black
black .
# Run Flake8
flake8
```
### Testing
pydolphinscheduler using [pytest][pytest] to test our codebase. GitHub Action will run our test when you create
pull request or commit to dev branch, with python version `3.6|3.7|3.8|3.9` and operating system `linux|macOS|windows`.
To test locally, you could directly run pytest after set `PYTHONPATH`
```shell
PYTHONPATH=src/ pytest
```
We try to keep pydolphinscheduler usable through unit test coverage. 90% test coverage is our target, but for
now, we require test coverage up to 85%, and each pull request leas than 85% would fail our CI step
`Tests coverage`. We use [coverage][coverage] to check our test coverage, and you could check it locally by
run command.
```shell
coverage run && coverage report
```
It would not only run unit test but also show each file coverage which cover rate less than 100%, and `TOTAL`
line show you total coverage of you code. If your CI failed with coverage you could go and find some reason by
this command output.
<!-- content -->
[pypi]: https://pypi.org/
[dev-setup]: https://dolphinscheduler.apache.org/en-us/development/development-environment-setup.html
[ui-project]: http://8.142.34.29:12345/dolphinscheduler/ui/#/projects/list
[py4j]: https://www.py4j.org/index.html
[pycharm]: https://www.jetbrains.com/pycharm
[idea]: https://www.jetbrains.com/idea/
[all-task]: https://dolphinscheduler.apache.org/en-us/docs/dev/user_doc/guide/task/shell.html
[pytest]: https://docs.pytest.org/en/latest/
[black]: https://black.readthedocs.io/en/stable/index.html
[flake8]: https://flake8.pycqa.org/en/latest/index.html
[black-editor]: https://black.readthedocs.io/en/stable/integrations/editors.html#pycharm-intellij-idea
[coverage]: https://coverage.readthedocs.io/en/stable/
[isort]: https://pycqa.github.io/isort/index.html
<!-- badge -->
[ga-py-test]: https://github.com/apache/dolphinscheduler/actions/workflows/py-ci.yml/badge.svg?branch=dev
[ga]: https://github.com/apache/dolphinscheduler/actions
[black-shield]: https://img.shields.io/badge/code%20style-black-000000.svg
[black-gh]: https://github.com/psf/black
[isort-shield]: https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336
[isort-gh]: https://pycqa.github.io/isort/

34
dolphinscheduler-python/pydolphinscheduler/ROADMAP.md

@ -0,0 +1,34 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## Roadmap
### v0.0.3
Add other features, tasks, parameters in DS, keep code coverage up to 90%
### v0.0.2
Add docs about how to use and develop package, code coverage up to 90%, add CI/CD
for package
### v0.0.1(current)
Setup up POC, for defining DAG with python code, running DAG manually,
releasing to pypi

55
dolphinscheduler-python/pydolphinscheduler/examples/bulk_create.py

@ -0,0 +1,55 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
This example show you how to create workflows in batch mode.
After this example run, we will create 10 workflows named `workflow:<workflow_num>`, and with 3 tasks
named `task:<task_num>-workflow:<workflow_num>` in each workflow. Task shape as below
task:1-workflow:1 -> task:2-workflow:1 -> task:3-workflow:1
Each workflow is linear since we set `IS_CHAIN=True`, you could change task to parallel by set it to `False`.
"""
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.shell import Shell
NUM_WORKFLOWS = 10
NUM_TASKS = 5
# Make sure your tenant exists in your operator system
TENANT = "exists_tenant"
# Whether task should dependent on pre one or not
# False will create workflow with independent task, while True task will dependent on pre-task and dependence
# link like `pre_task -> current_task -> next_task`, default True
IS_CHAIN = True
for wf in range(0, NUM_WORKFLOWS):
workflow_name = f"workflow:{wf}"
with ProcessDefinition(name=workflow_name, tenant=TENANT) as pd:
for t in range(0, NUM_TASKS):
task_name = f"task:{t}-{workflow_name}"
command = f"echo This is task {task_name}"
task = Shell(name=task_name, command=command)
if IS_CHAIN and t > 0:
pre_task_name = f"task:{t-1}-{workflow_name}"
pd.get_one_task_by_name(pre_task_name) >> task
# We just submit workflow and task definition without set schedule time or run it manually
pd.submit()

55
dolphinscheduler-python/pydolphinscheduler/examples/task_conditions_example.py

@ -0,0 +1,55 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
r"""
A example workflow for task condition.
This example will create five task in single workflow, with four shell task and one condition task. Task
condition have one upstream which we declare explicit with syntax `parent >> condition`, and three downstream
automatically set dependence by condition task by passing parameter `condition`. The graph of this workflow
like:
--> condition_success_1
/
parent -> conditions -> --> condition_success_2
\
--> condition_fail
.
"""
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.condition import FAILURE, SUCCESS, And, Conditions
from pydolphinscheduler.tasks.shell import Shell
with ProcessDefinition(name="task_conditions_example", tenant="tenant_exists") as pd:
parent = Shell(name="parent", command="echo parent")
condition_success_1 = Shell(
name="condition_success_1", command="echo condition_success_1"
)
condition_success_2 = Shell(
name="condition_success_2", command="echo condition_success_2"
)
condition_fail = Shell(name="condition_fail", command="echo condition_fail")
cond_operator = And(
And(
SUCCESS(condition_success_1, condition_success_2),
FAILURE(condition_fail),
),
)
condition = Conditions(name="conditions", condition=cond_operator)
parent >> condition
pd.submit()

50
dolphinscheduler-python/pydolphinscheduler/examples/task_datax_example.py

@ -0,0 +1,50 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
A example workflow for task datax.
This example will create a workflow named `task_datax`.
`task_datax` is true workflow define and run task task_datax.
You can create data sources `first_mysql` and `first_mysql` through UI.
It creates a task to synchronize datax from the source database to the target database.
"""
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.datax import CustomDataX, DataX
# datax json template
JSON_TEMPLATE = ""
with ProcessDefinition(
name="task_datax",
tenant="tenant_exists",
) as pd:
# This task synchronizes the data in `t_ds_project`
# of `first_mysql` database to `target_project` of `second_mysql` database.
task1 = DataX(
name="task_datax",
datasource_name="first_mysql",
datatarget_name="second_mysql",
sql="select id, name, code, description from source_table",
target_table="target_table",
)
# you can custom json_template of datax to sync data.
task2 = CustomDataX(name="task_custom_datax", json=JSON_TEMPLATE)
pd.run()

72
dolphinscheduler-python/pydolphinscheduler/examples/task_dependent_example.py

@ -0,0 +1,72 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
r"""
A example workflow for task dependent.
This example will create two workflows named `task_dependent` and `task_dependent_external`.
`task_dependent` is true workflow define and run task dependent, while `task_dependent_external`
define outside workflow and task from dependent.
After this script submit, we would get workflow as below:
task_dependent_external:
task_1
task_2
task_3
task_dependent:
task_dependent(this task dependent on task_dependent_external.task_1 and task_dependent_external.task_2).
"""
from pydolphinscheduler.constants import ProcessDefinitionDefault
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.dependent import And, Dependent, DependentItem, Or
from pydolphinscheduler.tasks.shell import Shell
with ProcessDefinition(
name="task_dependent_external",
tenant="tenant_exists",
) as pd:
task_1 = Shell(name="task_1", command="echo task 1")
task_2 = Shell(name="task_2", command="echo task 2")
task_3 = Shell(name="task_3", command="echo task 3")
pd.submit()
with ProcessDefinition(
name="task_dependent",
tenant="tenant_exists",
) as pd:
task = Dependent(
name="task_dependent",
dependence=And(
Or(
DependentItem(
project_name=ProcessDefinitionDefault.PROJECT,
process_definition_name="task_dependent_external",
dependent_task_name="task_1",
),
DependentItem(
project_name=ProcessDefinitionDefault.PROJECT,
process_definition_name="task_dependent_external",
dependent_task_name="task_2",
),
)
),
)
pd.submit()

50
dolphinscheduler-python/pydolphinscheduler/examples/task_switch_example.py

@ -0,0 +1,50 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
r"""
A example workflow for task switch.
This example will create four task in single workflow, with three shell task and one switch task. Task switch
have one upstream which we declare explicit with syntax `parent >> switch`, and two downstream automatically
set dependence by switch task by passing parameter `condition`. The graph of this workflow like:
--> switch_child_1
/
parent -> switch ->
\
--> switch_child_2
.
"""
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.shell import Shell
from pydolphinscheduler.tasks.switch import Branch, Default, Switch, SwitchCondition
with ProcessDefinition(
name="task_dependent_external",
tenant="tenant_exists",
) as pd:
parent = Shell(name="parent", command="echo parent")
switch_child_1 = Shell(name="switch_child_1", command="echo switch_child_1")
switch_child_2 = Shell(name="switch_child_2", command="echo switch_child_2")
switch_condition = SwitchCondition(
Branch(condition="${var} > 1", task=switch_child_1),
Default(task=switch_child_2),
)
switch = Switch(name="switch", condition=switch_condition)
parent >> switch
pd.submit()

52
dolphinscheduler-python/pydolphinscheduler/examples/tutorial.py

@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
r"""
A tutorial example take you to experience pydolphinscheduler.
After tutorial.py file submit to Apache DolphinScheduler server a DAG would be create,
and workflow DAG graph as below:
--> task_child_one
/ \
task_parent --> --> task_union
\ /
--> task_child_two
it will instantiate and run all the task it have.
"""
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.shell import Shell
with ProcessDefinition(
name="tutorial",
schedule="0 0 0 * * ? *",
start_time="2021-01-01",
tenant="tenant_exists",
) as pd:
task_parent = Shell(name="task_parent", command="echo hello pydolphinscheduler")
task_child_one = Shell(name="task_child_one", command="echo 'child one'")
task_child_two = Shell(name="task_child_two", command="echo 'child two'")
task_union = Shell(name="task_union", command="echo union")
task_group = [task_child_one, task_child_two]
task_parent.set_downstream(task_group)
task_union << task_group
pd.run()

22
dolphinscheduler-python/pydolphinscheduler/pytest.ini

@ -0,0 +1,22 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[pytest]
# Do not test test_java_gateway.py due to we can not mock java gateway for now
addopts = --ignore=tests/test_java_gateway.py
# add path here to skip pytest scan it
norecursedirs =
tests/testing

18
dolphinscheduler-python/pydolphinscheduler/requirements.txt

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
py4j~=0.10.9.2

27
dolphinscheduler-python/pydolphinscheduler/requirements_dev.txt

@ -0,0 +1,27 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# testting
pytest~=6.2.5
freezegun
# Test coverage
coverage
# code linting and formatting
flake8
flake8-docstrings
flake8-black
isort

16
dolphinscheduler-python/pydolphinscheduler/setup.cfg

@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

94
dolphinscheduler-python/pydolphinscheduler/setup.py

@ -0,0 +1,94 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""The script for setting up pydolphinscheduler."""
import sys
from os.path import dirname, join
from setuptools import find_packages, setup
version = "0.0.1.dev0"
if sys.version_info[0] < 3:
raise Exception(
"pydolphinscheduler does not support Python 2. Please upgrade to Python 3."
)
def read(*names, **kwargs):
"""Read file content from given file path."""
return open(
join(dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
).read()
setup(
name="pydolphinscheduler",
version=version,
license="Apache License 2.0",
description="Apache DolphinScheduler python SDK",
long_description=read("README.md"),
# Make sure pypi is expecting markdown
long_description_content_type="text/markdown",
author="Apache Software Foundation",
author_email="dev@dolphinscheduler.apache.org",
url="https://dolphinscheduler.apache.org/",
python_requires=">=3.6",
keywords=[
"dolphinscheduler",
"workflow",
"scheduler",
"taskflow",
],
project_urls={
"Homepage": "https://dolphinscheduler.apache.org",
"Documentation": "https://dolphinscheduler.apache.org/en-us/docs/latest/user_doc/quick-start.html",
"Source": "https://github.com/apache/dolphinscheduler",
"Issue Tracker": "https://github.com/apache/dolphinscheduler/issues",
"Discussion": "https://github.com/apache/dolphinscheduler/discussions",
"Twitter": "https://twitter.com/dolphinschedule",
},
packages=find_packages(where="src"),
package_dir={"": "src"},
include_package_data=True,
classifiers=[
# complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers
"Development Status :: 1 - Planning",
"Environment :: Console",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Operating System :: Unix",
"Operating System :: POSIX",
"Operating System :: Microsoft :: Windows",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Topic :: Software Development :: User Interfaces",
],
install_requires=[
# Core
"py4j~=0.10",
# Dev
"pytest~=6.2",
],
)

18
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/__init__.py

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init root of pydolphinscheduler."""

122
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/constants.py

@ -0,0 +1,122 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Constants for pydolphinscheduler."""
class ProcessDefinitionReleaseState:
"""Constants for :class:`pydolphinscheduler.core.process_definition.ProcessDefinition` release state."""
ONLINE: str = "ONLINE"
OFFLINE: str = "OFFLINE"
class ProcessDefinitionDefault:
"""Constants default value for :class:`pydolphinscheduler.core.process_definition.ProcessDefinition`."""
PROJECT: str = "project-pydolphin"
TENANT: str = "tenant_pydolphin"
USER: str = "userPythonGateway"
# TODO simple set password same as username
USER_PWD: str = "userPythonGateway"
USER_EMAIL: str = "userPythonGateway@dolphinscheduler.com"
USER_PHONE: str = "11111111111"
USER_STATE: int = 1
QUEUE: str = "queuePythonGateway"
WORKER_GROUP: str = "default"
TIME_ZONE: str = "Asia/Shanghai"
class TaskPriority(str):
"""Constants for task priority."""
HIGHEST = "HIGHEST"
HIGH = "HIGH"
MEDIUM = "MEDIUM"
LOW = "LOW"
LOWEST = "LOWEST"
class TaskFlag(str):
"""Constants for task flag."""
YES = "YES"
NO = "NO"
class TaskTimeoutFlag(str):
"""Constants for task timeout flag."""
CLOSE = "CLOSE"
class TaskType(str):
"""Constants for task type, it will also show you which kind we support up to now."""
SHELL = "SHELL"
HTTP = "HTTP"
PYTHON = "PYTHON"
SQL = "SQL"
SUB_PROCESS = "SUB_PROCESS"
PROCEDURE = "PROCEDURE"
DATAX = "DATAX"
DEPENDENT = "DEPENDENT"
CONDITIONS = "CONDITIONS"
SWITCH = "SWITCH"
class DefaultTaskCodeNum(str):
"""Constants and default value for default task code number."""
DEFAULT = 1
class JavaGatewayDefault(str):
"""Constants and default value for java gateway."""
RESULT_MESSAGE_KEYWORD = "msg"
RESULT_MESSAGE_SUCCESS = "success"
RESULT_STATUS_KEYWORD = "status"
RESULT_STATUS_SUCCESS = "SUCCESS"
RESULT_DATA = "data"
class Delimiter(str):
"""Constants for delimiter."""
BAR = "-"
DASH = "/"
COLON = ":"
UNDERSCORE = "_"
DIRECTION = "->"
class Time(str):
"""Constants for date."""
FMT_STD_DATE = "%Y-%m-%d"
LEN_STD_DATE = 10
FMT_DASH_DATE = "%Y/%m/%d"
FMT_SHORT_DATE = "%Y%m%d"
LEN_SHORT_DATE = 8
FMT_STD_TIME = "%H:%M:%S"
FMT_NO_COLON_TIME = "%H%M%S"

18
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/__init__.py

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init pydolphinscheduler.core package."""

74
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/base.py

@ -0,0 +1,74 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""DolphinScheduler Base object."""
from typing import Dict, Optional
# from pydolphinscheduler.side.user import User
from pydolphinscheduler.utils.string import attr2camel
class Base:
"""DolphinScheduler Base object."""
# Object key attribute, to test whether object equals and so on.
_KEY_ATTR: set = {"name", "description"}
# Object defines attribute, use when needs to communicate with Java gateway server.
_DEFINE_ATTR: set = set()
# Object default attribute, will add those attribute to `_DEFINE_ATTR` when init assign missing.
_DEFAULT_ATTR: Dict = {}
def __init__(self, name: str, description: Optional[str] = None):
self.name = name
self.description = description
def __repr__(self) -> str:
return f'<{type(self).__name__}: name="{self.name}">'
def __eq__(self, other):
return type(self) == type(other) and all(
getattr(self, a, None) == getattr(other, a, None) for a in self._KEY_ATTR
)
def get_define_custom(
self, camel_attr: bool = True, custom_attr: set = None
) -> Dict:
"""Get object definition attribute by given attr set."""
content = {}
for attr in custom_attr:
val = getattr(self, attr, None)
if camel_attr:
content[attr2camel(attr)] = val
else:
content[attr] = val
return content
def get_define(self, camel_attr: bool = True) -> Dict:
"""Get object definition attribute communicate to Java gateway server.
use attribute `self._DEFINE_ATTR` to determine which attributes should including when
object tries to communicate with Java gateway server.
"""
content = self.get_define_custom(camel_attr, self._DEFINE_ATTR)
update_default = {
k: self._DEFAULT_ATTR.get(k) for k in self._DEFAULT_ATTR if k not in content
}
content.update(update_default)
return content

40
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/base_side.py

@ -0,0 +1,40 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Module for side object."""
from typing import Optional
from pydolphinscheduler.constants import ProcessDefinitionDefault
from pydolphinscheduler.core.base import Base
class BaseSide(Base):
"""Base class for side object, it declare base behavior for them."""
def __init__(self, name: str, description: Optional[str] = None):
super().__init__(name, description)
@classmethod
def create_if_not_exists(
cls,
# TODO comment for avoiding cycle import
# user: Optional[User] = ProcessDefinitionDefault.USER
user=ProcessDefinitionDefault.USER,
):
"""Create Base if not exists."""
raise NotImplementedError

64
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/database.py

@ -0,0 +1,64 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Module database."""
import logging
from typing import Dict
from py4j.protocol import Py4JJavaError
from pydolphinscheduler.java_gateway import launch_gateway
class Database(dict):
"""database object, get information about database.
You provider database_name contain connection information, it decisions which
database type and database instance would run task.
"""
def __init__(self, database_name: str, type_key, database_key, *args, **kwargs):
super().__init__(*args, **kwargs)
self._database = {}
self.database_name = database_name
self[type_key] = self.database_type
self[database_key] = self.database_id
@property
def database_type(self) -> str:
"""Get database type from java gateway, a wrapper for :func:`get_database_info`."""
return self.get_database_info(self.database_name).get("type")
@property
def database_id(self) -> str:
"""Get database id from java gateway, a wrapper for :func:`get_database_info`."""
return self.get_database_info(self.database_name).get("id")
def get_database_info(self, name) -> Dict:
"""Get database info from java gateway, contains database id, type, name."""
if self._database:
return self._database
else:
gateway = launch_gateway()
try:
self._database = gateway.entry_point.getDatasourceInfo(name)
# Handler database source do not exists error, for now we just terminate the process.
except Py4JJavaError:
logging.error("Datasource name `%s` do not exists.", name)
exit(1)
return self._database

360
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/process_definition.py

@ -0,0 +1,360 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Module process definition, core class for workflow define."""
import json
from datetime import datetime
from typing import Any, Dict, List, Optional, Set
from pydolphinscheduler.constants import (
ProcessDefinitionDefault,
ProcessDefinitionReleaseState,
)
from pydolphinscheduler.core.base import Base
from pydolphinscheduler.exceptions import PyDSParamException, PyDSTaskNoFoundException
from pydolphinscheduler.java_gateway import launch_gateway
from pydolphinscheduler.side import Project, Tenant, User
from pydolphinscheduler.utils.date import MAX_DATETIME, conv_from_str, conv_to_schedule
class ProcessDefinitionContext:
"""Class process definition context, use when task get process definition from context expression."""
_context_managed_process_definition: Optional["ProcessDefinition"] = None
@classmethod
def set(cls, pd: "ProcessDefinition") -> None:
"""Set attribute self._context_managed_process_definition."""
cls._context_managed_process_definition = pd
@classmethod
def get(cls) -> Optional["ProcessDefinition"]:
"""Get attribute self._context_managed_process_definition."""
return cls._context_managed_process_definition
@classmethod
def delete(cls) -> None:
"""Delete attribute self._context_managed_process_definition."""
cls._context_managed_process_definition = None
class ProcessDefinition(Base):
"""process definition object, will define process definition attribute, task, relation.
TODO: maybe we should rename this class, currently use DS object name.
"""
# key attribute for identify ProcessDefinition object
_KEY_ATTR = {
"name",
"project",
"tenant",
"release_state",
"param",
}
_DEFINE_ATTR = {
"name",
"description",
"_project",
"_tenant",
"worker_group",
"timeout",
"release_state",
"param",
"tasks",
"task_definition_json",
"task_relation_json",
}
def __init__(
self,
name: str,
description: Optional[str] = None,
schedule: Optional[str] = None,
start_time: Optional[str] = None,
end_time: Optional[str] = None,
timezone: Optional[str] = ProcessDefinitionDefault.TIME_ZONE,
user: Optional[str] = ProcessDefinitionDefault.USER,
project: Optional[str] = ProcessDefinitionDefault.PROJECT,
tenant: Optional[str] = ProcessDefinitionDefault.TENANT,
queue: Optional[str] = ProcessDefinitionDefault.QUEUE,
worker_group: Optional[str] = ProcessDefinitionDefault.WORKER_GROUP,
timeout: Optional[int] = 0,
release_state: Optional[str] = ProcessDefinitionReleaseState.ONLINE,
param: Optional[List] = None,
):
super().__init__(name, description)
self.schedule = schedule
self._start_time = start_time
self._end_time = end_time
self.timezone = timezone
self._user = user
self._project = project
self._tenant = tenant
self._queue = queue
self.worker_group = worker_group
self.timeout = timeout
self.release_state = release_state
self.param = param
self.tasks: dict = {}
# TODO how to fix circle import
self._task_relations: set["TaskRelation"] = set() # noqa: F821
self._process_definition_code = None
def __enter__(self) -> "ProcessDefinition":
ProcessDefinitionContext.set(self)
return self
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
ProcessDefinitionContext.delete()
@property
def tenant(self) -> Tenant:
"""Get attribute tenant."""
return Tenant(self._tenant)
@tenant.setter
def tenant(self, tenant: Tenant) -> None:
"""Set attribute tenant."""
self._tenant = tenant.name
@property
def project(self) -> Project:
"""Get attribute project."""
return Project(self._project)
@project.setter
def project(self, project: Project) -> None:
"""Set attribute project."""
self._project = project.name
@property
def user(self) -> User:
"""Get user object.
For now we just get from python side but not from java gateway side, so it may not correct.
"""
return User(
self._user,
ProcessDefinitionDefault.USER_PWD,
ProcessDefinitionDefault.USER_EMAIL,
ProcessDefinitionDefault.USER_PHONE,
self._tenant,
self._queue,
ProcessDefinitionDefault.USER_STATE,
)
@staticmethod
def _parse_datetime(val: Any) -> Any:
if val is None or isinstance(val, datetime):
return val
elif isinstance(val, str):
return conv_from_str(val)
else:
raise PyDSParamException("Do not support value type %s for now", type(val))
@property
def start_time(self) -> Any:
"""Get attribute start_time."""
return self._parse_datetime(self._start_time)
@start_time.setter
def start_time(self, val) -> None:
"""Set attribute start_time."""
self._start_time = val
@property
def end_time(self) -> Any:
"""Get attribute end_time."""
return self._parse_datetime(self._end_time)
@end_time.setter
def end_time(self, val) -> None:
"""Set attribute end_time."""
self._end_time = val
@property
def task_definition_json(self) -> List[Dict]:
"""Return all tasks definition in list of dict."""
if not self.tasks:
return [self.tasks]
else:
return [task.get_define() for task in self.tasks.values()]
@property
def task_relation_json(self) -> List[Dict]:
"""Return all relation between tasks pair in list of dict."""
if not self.tasks:
return [self.tasks]
else:
self._handle_root_relation()
return [tr.get_define() for tr in self._task_relations]
@property
def schedule_json(self) -> Optional[Dict]:
"""Get schedule parameter json object. This is requests from java gateway interface."""
if not self.schedule:
return None
else:
start_time = conv_to_schedule(
self.start_time if self.start_time else datetime.now()
)
end_time = conv_to_schedule(
self.end_time if self.end_time else MAX_DATETIME
)
return {
"startTime": start_time,
"endTime": end_time,
"crontab": self.schedule,
"timezoneId": self.timezone,
}
# TODO inti DAG's tasks are in the same location with default {x: 0, y: 0}
@property
def task_location(self) -> List[Dict]:
"""Return all tasks location for all process definition.
For now, we only set all location with same x and y valued equal to 0. Because we do not
find a good way to set task locations. This is requests from java gateway interface.
"""
if not self.tasks:
return [self.tasks]
else:
return [{"taskCode": task_code, "x": 0, "y": 0} for task_code in self.tasks]
@property
def task_list(self) -> List["Task"]: # noqa: F821
"""Return list of tasks objects."""
return list(self.tasks.values())
def _handle_root_relation(self):
"""Handle root task property :class:`pydolphinscheduler.core.task.TaskRelation`.
Root task in DAG do not have dominant upstream node, but we have to add an exactly default
upstream task with task_code equal to `0`. This is requests from java gateway interface.
"""
from pydolphinscheduler.core.task import TaskRelation
post_relation_code = set()
for relation in self._task_relations:
post_relation_code.add(relation.post_task_code)
for task in self.task_list:
if task.code not in post_relation_code:
root_relation = TaskRelation(pre_task_code=0, post_task_code=task.code)
self._task_relations.add(root_relation)
def add_task(self, task: "Task") -> None: # noqa: F821
"""Add a single task to process definition."""
self.tasks[task.code] = task
task._process_definition = self
def add_tasks(self, tasks: List["Task"]) -> None: # noqa: F821
"""Add task sequence to process definition, it a wrapper of :func:`add_task`."""
for task in tasks:
self.add_task(task)
def get_task(self, code: str) -> "Task": # noqa: F821
"""Get task object from process definition by given code."""
if code not in self.tasks:
raise PyDSTaskNoFoundException(
"Task with code %s can not found in process definition %",
(code, self.name),
)
return self.tasks[code]
# TODO which tying should return in this case
def get_tasks_by_name(self, name: str) -> Set["Task"]: # noqa: F821
"""Get tasks object by given name, if will return all tasks with this name."""
find = set()
for task in self.tasks.values():
if task.name == name:
find.add(task)
return find
def get_one_task_by_name(self, name: str) -> "Task": # noqa: F821
"""Get exact one task from process definition by given name.
Function always return one task even though this process definition have more than one task with
this name.
"""
tasks = self.get_tasks_by_name(name)
if not tasks:
raise PyDSTaskNoFoundException(f"Can not find task with name {name}.")
return tasks.pop()
def run(self):
"""Submit and Start ProcessDefinition instance.
Shortcut for function :func:`submit` and function :func:`start`. Only support manual start workflow
for now, and schedule run will coming soon.
:return:
"""
self.submit()
self.start()
def _ensure_side_model_exists(self):
"""Ensure process definition side model exists.
For now, side object including :class:`pydolphinscheduler.side.project.Project`,
:class:`pydolphinscheduler.side.tenant.Tenant`, :class:`pydolphinscheduler.side.user.User`.
If these model not exists, would create default value in
:class:`pydolphinscheduler.constants.ProcessDefinitionDefault`.
"""
# TODO used metaclass for more pythonic
self.tenant.create_if_not_exists(self._queue)
# model User have to create after Tenant created
self.user.create_if_not_exists()
# Project model need User object exists
self.project.create_if_not_exists(self._user)
def submit(self) -> int:
"""Submit ProcessDefinition instance to java gateway."""
self._ensure_side_model_exists()
gateway = launch_gateway()
self._process_definition_code = gateway.entry_point.createOrUpdateProcessDefinition(
self._user,
self._project,
self.name,
str(self.description) if self.description else "",
str(self.param) if self.param else None,
json.dumps(self.schedule_json) if self.schedule_json else None,
json.dumps(self.task_location),
self.timeout,
self.worker_group,
self._tenant,
# TODO add serialization function
json.dumps(self.task_relation_json),
json.dumps(self.task_definition_json),
)
return self._process_definition_code
def start(self) -> None:
"""Create and start ProcessDefinition instance.
which post to `start-process-instance` to java gateway
"""
gateway = launch_gateway()
gateway.entry_point.execProcessInstance(
self._user,
self._project,
self.name,
"",
self.worker_group,
24 * 3600,
)

268
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/core/task.py

@ -0,0 +1,268 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""DolphinScheduler Task and TaskRelation object."""
import logging
from typing import Dict, List, Optional, Sequence, Set, Tuple, Union
from pydolphinscheduler.constants import (
Delimiter,
ProcessDefinitionDefault,
TaskFlag,
TaskPriority,
TaskTimeoutFlag,
)
from pydolphinscheduler.core.base import Base
from pydolphinscheduler.core.process_definition import (
ProcessDefinition,
ProcessDefinitionContext,
)
from pydolphinscheduler.java_gateway import launch_gateway
class TaskRelation(Base):
"""TaskRelation object, describe the relation of exactly two tasks."""
# Add attr `_KEY_ATTR` to overwrite :func:`__eq__`, it is make set
# `Task.process_definition._task_relations` work correctly.
_KEY_ATTR = {
"pre_task_code",
"post_task_code",
}
_DEFINE_ATTR = {
"pre_task_code",
"post_task_code",
}
_DEFAULT_ATTR = {
"name": "",
"preTaskVersion": 1,
"postTaskVersion": 1,
"conditionType": 0,
"conditionParams": {},
}
def __init__(
self,
pre_task_code: int,
post_task_code: int,
name: Optional[str] = None,
):
super().__init__(name)
self.pre_task_code = pre_task_code
self.post_task_code = post_task_code
def __hash__(self):
return hash(f"{self.pre_task_code} {Delimiter.DIRECTION} {self.post_task_code}")
class Task(Base):
"""Task object, parent class for all exactly task type."""
_DEFINE_ATTR = {
"name",
"code",
"version",
"task_type",
"task_params",
"description",
"flag",
"task_priority",
"worker_group",
"delay_time",
"fail_retry_times",
"fail_retry_interval",
"timeout_flag",
"timeout_notify_strategy",
"timeout",
}
_task_custom_attr: set = set()
DEFAULT_CONDITION_RESULT = {"successNode": [""], "failedNode": [""]}
def __init__(
self,
name: str,
task_type: str,
description: Optional[str] = None,
flag: Optional[str] = TaskFlag.YES,
task_priority: Optional[str] = TaskPriority.MEDIUM,
worker_group: Optional[str] = ProcessDefinitionDefault.WORKER_GROUP,
delay_time: Optional[int] = 0,
fail_retry_times: Optional[int] = 0,
fail_retry_interval: Optional[int] = 1,
timeout_flag: Optional[int] = TaskTimeoutFlag.CLOSE,
timeout_notify_strategy: Optional = None,
timeout: Optional[int] = 0,
process_definition: Optional[ProcessDefinition] = None,
local_params: Optional[List] = None,
resource_list: Optional[List] = None,
dependence: Optional[Dict] = None,
wait_start_timeout: Optional[Dict] = None,
condition_result: Optional[Dict] = None,
):
super().__init__(name, description)
self.task_type = task_type
self.flag = flag
self.task_priority = task_priority
self.worker_group = worker_group
self.fail_retry_times = fail_retry_times
self.fail_retry_interval = fail_retry_interval
self.delay_time = delay_time
self.timeout_flag = timeout_flag
self.timeout_notify_strategy = timeout_notify_strategy
self.timeout = timeout
self._process_definition = None
self.process_definition: ProcessDefinition = (
process_definition or ProcessDefinitionContext.get()
)
self._upstream_task_codes: Set[int] = set()
self._downstream_task_codes: Set[int] = set()
self._task_relation: Set[TaskRelation] = set()
# move attribute code and version after _process_definition and process_definition declare
self.code, self.version = self.gen_code_and_version()
# Add task to process definition, maybe we could put into property process_definition latter
if (
self.process_definition is not None
and self.code not in self.process_definition.tasks
):
self.process_definition.add_task(self)
else:
logging.warning(
"Task code %d already in process definition, prohibit re-add task.",
self.code,
)
# Attribute for task param
self.local_params = local_params or []
self.resource_list = resource_list or []
self.dependence = dependence or {}
self.wait_start_timeout = wait_start_timeout or {}
self.condition_result = condition_result or self.DEFAULT_CONDITION_RESULT
@property
def process_definition(self) -> Optional[ProcessDefinition]:
"""Get attribute process_definition."""
return self._process_definition
@process_definition.setter
def process_definition(self, process_definition: Optional[ProcessDefinition]):
"""Set attribute process_definition."""
self._process_definition = process_definition
@property
def task_params(self) -> Optional[Dict]:
"""Get task parameter object.
Will get result to combine _task_custom_attr and custom_attr.
"""
custom_attr = {
"local_params",
"resource_list",
"dependence",
"wait_start_timeout",
"condition_result",
}
custom_attr |= self._task_custom_attr
return self.get_define_custom(custom_attr=custom_attr)
def __hash__(self):
return hash(self.code)
def __lshift__(self, other: Union["Task", Sequence["Task"]]):
"""Implement Task << Task."""
self.set_upstream(other)
return other
def __rshift__(self, other: Union["Task", Sequence["Task"]]):
"""Implement Task >> Task."""
self.set_downstream(other)
return other
def __rrshift__(self, other: Union["Task", Sequence["Task"]]):
"""Call for Task >> [Task] because list don't have __rshift__ operators."""
self.__lshift__(other)
return self
def __rlshift__(self, other: Union["Task", Sequence["Task"]]):
"""Call for Task << [Task] because list don't have __lshift__ operators."""
self.__rshift__(other)
return self
def _set_deps(
self, tasks: Union["Task", Sequence["Task"]], upstream: bool = True
) -> None:
"""
Set parameter tasks dependent to current task.
it is a wrapper for :func:`set_upstream` and :func:`set_downstream`.
"""
if not isinstance(tasks, Sequence):
tasks = [tasks]
for task in tasks:
if upstream:
self._upstream_task_codes.add(task.code)
task._downstream_task_codes.add(self.code)
if self._process_definition:
task_relation = TaskRelation(
pre_task_code=task.code,
post_task_code=self.code,
name=f"{task.name} {Delimiter.DIRECTION} {self.name}",
)
self.process_definition._task_relations.add(task_relation)
else:
self._downstream_task_codes.add(task.code)
task._upstream_task_codes.add(self.code)
if self._process_definition:
task_relation = TaskRelation(
pre_task_code=self.code,
post_task_code=task.code,
name=f"{self.name} {Delimiter.DIRECTION} {task.name}",
)
self.process_definition._task_relations.add(task_relation)
def set_upstream(self, tasks: Union["Task", Sequence["Task"]]) -> None:
"""Set parameter tasks as upstream to current task."""
self._set_deps(tasks, upstream=True)
def set_downstream(self, tasks: Union["Task", Sequence["Task"]]) -> None:
"""Set parameter tasks as downstream to current task."""
self._set_deps(tasks, upstream=False)
# TODO code should better generate in bulk mode when :ref: processDefinition run submit or start
def gen_code_and_version(self) -> Tuple:
"""
Generate task code and version from java gateway.
If task name do not exists in process definition before, if will generate new code and version id
equal to 0 by java gateway, otherwise if will return the exists code and version.
"""
# TODO get code from specific project process definition and task name
gateway = launch_gateway()
result = gateway.entry_point.getCodeAndVersion(
self.process_definition._project, self.name
)
# result = gateway.entry_point.genTaskCodeList(DefaultTaskCodeNum.DEFAULT)
# gateway_result_checker(result)
return result.get("code"), result.get("version")

46
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/exceptions.py

@ -0,0 +1,46 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Exceptions for pydolphinscheduler."""
class PyDSBaseException(Exception):
"""Base exception for pydolphinscheduler."""
pass
class PyDSParamException(PyDSBaseException):
"""Exception for pydolphinscheduler parameter verify error."""
pass
class PyDSTaskNoFoundException(PyDSBaseException):
"""Exception for pydolphinscheduler workflow task no found error."""
pass
class PyDSJavaGatewayException(PyDSBaseException):
"""Exception for pydolphinscheduler Java gateway error."""
pass
class PyDSProcessDefinitionNotAssignException(PyDSBaseException):
"""Exception for pydolphinscheduler process definition not assign error."""

55
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/java_gateway.py

@ -0,0 +1,55 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Module java gateway, contain gateway behavior."""
from typing import Any, Optional
from py4j.java_collections import JavaMap
from py4j.java_gateway import GatewayParameters, JavaGateway
from pydolphinscheduler.constants import JavaGatewayDefault
from pydolphinscheduler.exceptions import PyDSJavaGatewayException
def launch_gateway() -> JavaGateway:
"""Launch java gateway to pydolphinscheduler.
TODO Note that automatic conversion makes calling Java methods slightly less efficient because
in the worst case, Py4J needs to go through all registered converters for all parameters.
This is why automatic conversion is disabled by default.
"""
gateway = JavaGateway(gateway_parameters=GatewayParameters(auto_convert=True))
return gateway
def gateway_result_checker(
result: JavaMap,
msg_check: Optional[str] = JavaGatewayDefault.RESULT_MESSAGE_SUCCESS,
) -> Any:
"""Check weather java gateway result success or not."""
if (
result[JavaGatewayDefault.RESULT_STATUS_KEYWORD].toString()
!= JavaGatewayDefault.RESULT_STATUS_SUCCESS
):
raise PyDSJavaGatewayException("Failed when try to got result for java gateway")
if (
msg_check is not None
and result[JavaGatewayDefault.RESULT_MESSAGE_KEYWORD] != msg_check
):
raise PyDSJavaGatewayException("Get result state not success.")
return result

22
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/__init__.py

@ -0,0 +1,22 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init Side package, Side package keep object related to DolphinScheduler but not in the Core part."""
from pydolphinscheduler.side.project import Project
from pydolphinscheduler.side.tenant import Tenant
from pydolphinscheduler.side.user import User

42
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/project.py

@ -0,0 +1,42 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""DolphinScheduler Project object."""
from typing import Optional
from pydolphinscheduler.constants import ProcessDefinitionDefault
from pydolphinscheduler.core.base_side import BaseSide
from pydolphinscheduler.java_gateway import launch_gateway
class Project(BaseSide):
"""DolphinScheduler Project object."""
def __init__(
self,
name: str = ProcessDefinitionDefault.PROJECT,
description: Optional[str] = None,
):
super().__init__(name, description)
def create_if_not_exists(self, user=ProcessDefinitionDefault.USER) -> None:
"""Create Project if not exists."""
gateway = launch_gateway()
gateway.entry_point.createProject(user, self.name, self.description)
# TODO recover result checker
# gateway_result_checker(result, None)

42
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/queue.py

@ -0,0 +1,42 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""DolphinScheduler User object."""
from typing import Optional
from pydolphinscheduler.constants import ProcessDefinitionDefault
from pydolphinscheduler.core.base_side import BaseSide
from pydolphinscheduler.java_gateway import gateway_result_checker, launch_gateway
class Queue(BaseSide):
"""DolphinScheduler Queue object."""
def __init__(
self,
name: str = ProcessDefinitionDefault.QUEUE,
description: Optional[str] = "",
):
super().__init__(name, description)
def create_if_not_exists(self, user=ProcessDefinitionDefault.USER) -> None:
"""Create Queue if not exists."""
gateway = launch_gateway()
# Here we set Queue.name and Queue.queueName same as self.name
result = gateway.entry_point.createProject(user, self.name, self.name)
gateway_result_checker(result, None)

45
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/tenant.py

@ -0,0 +1,45 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""DolphinScheduler Tenant object."""
from typing import Optional
from pydolphinscheduler.constants import ProcessDefinitionDefault
from pydolphinscheduler.core.base_side import BaseSide
from pydolphinscheduler.java_gateway import launch_gateway
class Tenant(BaseSide):
"""DolphinScheduler Tenant object."""
def __init__(
self,
name: str = ProcessDefinitionDefault.TENANT,
queue: str = ProcessDefinitionDefault.QUEUE,
description: Optional[str] = None,
):
super().__init__(name, description)
self.queue = queue
def create_if_not_exists(
self, queue_name: str, user=ProcessDefinitionDefault.USER
) -> None:
"""Create Tenant if not exists."""
gateway = launch_gateway()
gateway.entry_point.createTenant(self.name, self.description, queue_name)
# gateway_result_checker(result, None)

70
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/user.py

@ -0,0 +1,70 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""DolphinScheduler User object."""
from typing import Optional
from pydolphinscheduler.core.base_side import BaseSide
from pydolphinscheduler.java_gateway import launch_gateway
class User(BaseSide):
"""DolphinScheduler User object."""
_KEY_ATTR = {
"name",
"password",
"email",
"phone",
"tenant",
"queue",
"status",
}
def __init__(
self,
name: str,
password: str,
email: str,
phone: str,
tenant: str,
queue: Optional[str] = None,
status: Optional[int] = 1,
):
super().__init__(name)
self.password = password
self.email = email
self.phone = phone
self.tenant = tenant
self.queue = queue
self.status = status
def create_if_not_exists(self, **kwargs):
"""Create User if not exists."""
gateway = launch_gateway()
gateway.entry_point.createUser(
self.name,
self.password,
self.email,
self.phone,
self.tenant,
self.queue,
self.status,
)
# TODO recover result checker
# gateway_result_checker(result, None)

30
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/side/worker_group.py

@ -0,0 +1,30 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""DolphinScheduler Worker Group object."""
from typing import Optional
from pydolphinscheduler.core.base_side import BaseSide
class WorkerGroup(BaseSide):
"""DolphinScheduler Worker Group object."""
def __init__(self, name: str, address: str, description: Optional[str] = None):
super().__init__(name, description)
self.address = address

18
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/__init__.py

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init pydolphinscheduler.tasks package."""

185
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/condition.py

@ -0,0 +1,185 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Task Conditions."""
from typing import Dict, List
from pydolphinscheduler.constants import TaskType
from pydolphinscheduler.core.base import Base
from pydolphinscheduler.core.task import Task
from pydolphinscheduler.exceptions import PyDSParamException
class Status(Base):
"""Base class of Condition task status.
It a parent class for :class:`SUCCESS` and :class:`FAILURE`. Provider status name
and :func:`get_define` to sub class.
"""
def __init__(self, *tasks):
super().__init__(f"Condition.{self.status_name()}")
self.tasks = tasks
def __repr__(self) -> str:
return "depend_item_list"
@classmethod
def status_name(cls) -> str:
"""Get name for Status or its sub class."""
return cls.__name__.upper()
def get_define(self, camel_attr: bool = True) -> List:
"""Get status definition attribute communicate to Java gateway server."""
content = []
for task in self.tasks:
if not isinstance(task, Task):
raise PyDSParamException(
"%s only accept class Task or sub class Task, but get %s",
(self.status_name(), type(task)),
)
content.append({"depTaskCode": task.code, "status": self.status_name()})
return content
class SUCCESS(Status):
"""Class SUCCESS to task condition, sub class of :class:`Status`."""
def __init__(self, *tasks):
super().__init__(*tasks)
class FAILURE(Status):
"""Class FAILURE to task condition, sub class of :class:`Status`."""
def __init__(self, *tasks):
super().__init__(*tasks)
class ConditionOperator(Base):
"""Set ConditionTask or ConditionOperator with specific operator."""
_DEFINE_ATTR = {
"relation",
}
def __init__(self, *args):
super().__init__(self.__class__.__name__)
self.args = args
def __repr__(self) -> str:
return "depend_task_list"
@classmethod
def operator_name(cls) -> str:
"""Get operator name in different class."""
return cls.__name__.upper()
@property
def relation(self) -> str:
"""Get operator name in different class, for function :func:`get_define`."""
return self.operator_name()
def set_define_attr(self) -> str:
"""Set attribute to function :func:`get_define`.
It is a wrapper for both `And` and `Or` operator.
"""
result = []
attr = None
for condition in self.args:
if isinstance(condition, (Status, ConditionOperator)):
if attr is None:
attr = repr(condition)
elif repr(condition) != attr:
raise PyDSParamException(
"Condition %s operator parameter only support same type.",
self.relation,
)
else:
raise PyDSParamException(
"Condition %s operator parameter support ConditionTask and ConditionOperator but got %s.",
(self.relation, type(condition)),
)
if attr == "depend_item_list":
result.extend(condition.get_define())
else:
result.append(condition.get_define())
setattr(self, attr, result)
return attr
def get_define(self, camel_attr=True) -> Dict:
"""Overwrite Base.get_define to get task Condition specific get define."""
attr = self.set_define_attr()
dependent_define_attr = self._DEFINE_ATTR.union({attr})
return super().get_define_custom(
camel_attr=True, custom_attr=dependent_define_attr
)
class And(ConditionOperator):
"""Operator And for task condition.
It could accept both :class:`Task` and children of :class:`ConditionOperator`,
and set AND condition to those args.
"""
def __init__(self, *args):
super().__init__(*args)
class Or(ConditionOperator):
"""Operator Or for task condition.
It could accept both :class:`Task` and children of :class:`ConditionOperator`,
and set OR condition to those args.
"""
def __init__(self, *args):
super().__init__(*args)
class Conditions(Task):
"""Task condition object, declare behavior for condition task to dolphinscheduler."""
def __init__(self, name: str, condition: ConditionOperator, *args, **kwargs):
super().__init__(name, TaskType.CONDITIONS, *args, **kwargs)
self.condition = condition
# Set condition tasks as current task downstream
self._set_dep()
def _set_dep(self) -> None:
"""Set downstream according to parameter `condition`."""
downstream = []
for cond in self.condition.args:
if isinstance(cond, ConditionOperator):
for status in cond.args:
downstream.extend(list(status.tasks))
self.set_downstream(downstream)
@property
def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict:
"""Override Task.task_params for Condition task.
Condition task have some specials attribute `dependence`, and in most of the task
this attribute is None and use empty dict `{}` as default value. We do not use class
attribute `_task_custom_attr` due to avoid attribute cover.
"""
params = super().task_params
params["dependence"] = self.condition.get_define()
return params

121
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/datax.py

@ -0,0 +1,121 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Task datax."""
from typing import Dict, List, Optional
from pydolphinscheduler.constants import TaskType
from pydolphinscheduler.core.database import Database
from pydolphinscheduler.core.task import Task
class CustomDataX(Task):
"""Task CustomDatax object, declare behavior for custom DataX task to dolphinscheduler.
You provider json template for DataX, it can synchronize data according to the template you provided.
"""
CUSTOM_CONFIG = 1
_task_custom_attr = {"custom_config", "json", "xms", "xmx"}
def __init__(
self,
name: str,
json: str,
xms: Optional[int] = 1,
xmx: Optional[int] = 1,
*args,
**kwargs
):
super().__init__(name, TaskType.DATAX, *args, **kwargs)
self.custom_config = self.CUSTOM_CONFIG
self.json = json
self.xms = xms
self.xmx = xmx
class DataX(Task):
"""Task DataX object, declare behavior for DataX task to dolphinscheduler.
It should run database datax job in multiply sql link engine, such as:
- MySQL
- Oracle
- Postgresql
- SQLServer
You provider datasource_name and datatarget_name contain connection information, it decisions which
database type and database instance would synchronous data.
"""
CUSTOM_CONFIG = 0
_task_custom_attr = {
"custom_config",
"sql",
"target_table",
"job_speed_byte",
"job_speed_record",
"pre_statements",
"post_statements",
"xms",
"xmx",
}
def __init__(
self,
name: str,
datasource_name: str,
datatarget_name: str,
sql: str,
target_table: str,
job_speed_byte: Optional[int] = 0,
job_speed_record: Optional[int] = 1000,
pre_statements: Optional[List[str]] = None,
post_statements: Optional[List[str]] = None,
xms: Optional[int] = 1,
xmx: Optional[int] = 1,
*args,
**kwargs
):
super().__init__(name, TaskType.DATAX, *args, **kwargs)
self.sql = sql
self.custom_config = self.CUSTOM_CONFIG
self.datasource_name = datasource_name
self.datatarget_name = datatarget_name
self.target_table = target_table
self.job_speed_byte = job_speed_byte
self.job_speed_record = job_speed_record
self.pre_statements = pre_statements or []
self.post_statements = post_statements or []
self.xms = xms
self.xmx = xmx
@property
def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict:
"""Override Task.task_params for datax task.
datax task have some specials attribute for task_params, and is odd if we
directly set as python property, so we Override Task.task_params here.
"""
params = super().task_params
datasource = Database(self.datasource_name, "dsType", "dataSource")
params.update(datasource)
datatarget = Database(self.datatarget_name, "dtType", "dataTarget")
params.update(datatarget)
return params

274
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/dependent.py

@ -0,0 +1,274 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Task dependent."""
from typing import Dict, Optional, Tuple
from pydolphinscheduler.constants import TaskType
from pydolphinscheduler.core.base import Base
from pydolphinscheduler.core.task import Task
from pydolphinscheduler.exceptions import PyDSJavaGatewayException, PyDSParamException
from pydolphinscheduler.java_gateway import launch_gateway
DEPENDENT_ALL_TASK_IN_WORKFLOW = "0"
class DependentDate(str):
"""Constant of Dependent date value.
These values set according to Java server side, if you want to add and change it,
please change Java server side first.
"""
# TODO Maybe we should add parent level to DependentDate for easy to use, such as
# DependentDate.MONTH.THIS_MONTH
# Hour
CURRENT_HOUR = "currentHour"
LAST_ONE_HOUR = "last1Hour"
LAST_TWO_HOURS = "last2Hours"
LAST_THREE_HOURS = "last3Hours"
LAST_TWENTY_FOUR_HOURS = "last24Hours"
# Day
TODAY = "today"
LAST_ONE_DAYS = "last1Days"
LAST_TWO_DAYS = "last2Days"
LAST_THREE_DAYS = "last3Days"
LAST_SEVEN_DAYS = "last7Days"
# Week
THIS_WEEK = "thisWeek"
LAST_WEEK = "lastWeek"
LAST_MONDAY = "lastMonday"
LAST_TUESDAY = "lastTuesday"
LAST_WEDNESDAY = "lastWednesday"
LAST_THURSDAY = "lastThursday"
LAST_FRIDAY = "lastFriday"
LAST_SATURDAY = "lastSaturday"
LAST_SUNDAY = "lastSunday"
# Month
THIS_MONTH = "thisMonth"
LAST_MONTH = "lastMonth"
LAST_MONTH_BEGIN = "lastMonthBegin"
LAST_MONTH_END = "lastMonthEnd"
class DependentItem(Base):
"""Dependent item object, minimal unit for task dependent.
It declare which project, process_definition, task are dependent to this task.
"""
_DEFINE_ATTR = {
"project_code",
"definition_code",
"dep_task_code",
"cycle",
"date_value",
}
# TODO maybe we should conside overwrite operator `and` and `or` for DependentItem to
# support more easy way to set relation
def __init__(
self,
project_name: str,
process_definition_name: str,
dependent_task_name: Optional[str] = DEPENDENT_ALL_TASK_IN_WORKFLOW,
dependent_date: Optional[DependentDate] = DependentDate.TODAY,
):
obj_name = f"{project_name}.{process_definition_name}.{dependent_task_name}.{dependent_date}"
super().__init__(obj_name)
self.project_name = project_name
self.process_definition_name = process_definition_name
self.dependent_task_name = dependent_task_name
if dependent_date is None:
raise PyDSParamException(
"Parameter dependent_date must provider by got None."
)
else:
self.dependent_date = dependent_date
self._code = {}
def __repr__(self) -> str:
return "depend_item_list"
@property
def project_code(self) -> str:
"""Get dependent project code."""
return self.get_code_from_gateway().get("projectCode")
@property
def definition_code(self) -> str:
"""Get dependent definition code."""
return self.get_code_from_gateway().get("processDefinitionCode")
@property
def dep_task_code(self) -> str:
"""Get dependent tasks code list."""
if self.is_all_task:
return DEPENDENT_ALL_TASK_IN_WORKFLOW
else:
return self.get_code_from_gateway().get("taskDefinitionCode")
# TODO Maybe we should get cycle from dependent date class.
@property
def cycle(self) -> str:
"""Get dependent cycle."""
if "Hour" in self.dependent_date:
return "hour"
elif self.dependent_date == "today" or "Days" in self.dependent_date:
return "day"
elif "Month" in self.dependent_date:
return "month"
else:
return "week"
@property
def date_value(self) -> str:
"""Get dependent date."""
return self.dependent_date
@property
def is_all_task(self) -> bool:
"""Check whether dependent all tasks or not."""
return self.dependent_task_name == DEPENDENT_ALL_TASK_IN_WORKFLOW
@property
def code_parameter(self) -> Tuple:
"""Get name info parameter to query code."""
param = (
self.project_name,
self.process_definition_name,
self.dependent_task_name if not self.is_all_task else None,
)
return param
def get_code_from_gateway(self) -> Dict:
"""Get project, definition, task code from given parameter."""
if self._code:
return self._code
else:
gateway = launch_gateway()
try:
self._code = gateway.entry_point.getDependentInfo(*self.code_parameter)
return self._code
except Exception:
raise PyDSJavaGatewayException("Function get_code_from_gateway error.")
class DependentOperator(Base):
"""Set DependentItem or dependItemList with specific operator."""
_DEFINE_ATTR = {
"relation",
}
def __init__(self, *args):
super().__init__(self.__class__.__name__)
self.args = args
def __repr__(self) -> str:
return "depend_task_list"
@classmethod
def operator_name(cls) -> str:
"""Get operator name in different class."""
return cls.__name__.upper()
@property
def relation(self) -> str:
"""Get operator name in different class, for function :func:`get_define`."""
return self.operator_name()
def set_define_attr(self) -> str:
"""Set attribute to function :func:`get_define`.
It is a wrapper for both `And` and `Or` operator.
"""
result = []
attr = None
for dependent in self.args:
if isinstance(dependent, (DependentItem, DependentOperator)):
if attr is None:
attr = repr(dependent)
elif repr(dependent) != attr:
raise PyDSParamException(
"Dependent %s operator parameter only support same type.",
self.relation,
)
else:
raise PyDSParamException(
"Dependent %s operator parameter support DependentItem and "
"DependentOperator but got %s.",
(self.relation, type(dependent)),
)
result.append(dependent.get_define())
setattr(self, attr, result)
return attr
def get_define(self, camel_attr=True) -> Dict:
"""Overwrite Base.get_define to get task dependent specific get define."""
attr = self.set_define_attr()
dependent_define_attr = self._DEFINE_ATTR.union({attr})
return super().get_define_custom(
camel_attr=True, custom_attr=dependent_define_attr
)
class And(DependentOperator):
"""Operator And for task dependent.
It could accept both :class:`DependentItem` and children of :class:`DependentOperator`,
and set AND condition to those args.
"""
def __init__(self, *args):
super().__init__(*args)
class Or(DependentOperator):
"""Operator Or for task dependent.
It could accept both :class:`DependentItem` and children of :class:`DependentOperator`,
and set OR condition to those args.
"""
def __init__(self, *args):
super().__init__(*args)
class Dependent(Task):
"""Task dependent object, declare behavior for dependent task to dolphinscheduler."""
def __init__(self, name: str, dependence: DependentOperator, *args, **kwargs):
super().__init__(name, TaskType.DEPENDENT, *args, **kwargs)
self.dependence = dependence
@property
def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict:
"""Override Task.task_params for dependent task.
Dependent task have some specials attribute `dependence`, and in most of the task
this attribute is None and use empty dict `{}` as default value. We do not use class
attribute `_task_custom_attr` due to avoid attribute cover.
"""
params = super().task_params
params["dependence"] = self.dependence.get_define()
return params

101
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/http.py

@ -0,0 +1,101 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Task shell."""
from typing import Optional
from pydolphinscheduler.constants import TaskType
from pydolphinscheduler.core.task import Task
from pydolphinscheduler.exceptions import PyDSParamException
class HttpMethod:
"""Constant of HTTP method."""
GET = "GET"
POST = "POST"
HEAD = "HEAD"
PUT = "PUT"
DELETE = "DELETE"
class HttpCheckCondition:
"""Constant of HTTP check condition.
For now it contain four value:
- STATUS_CODE_DEFAULT: when http response code equal to 200, mark as success.
- STATUS_CODE_CUSTOM: when http response code equal to the code user define, mark as success.
- BODY_CONTAINS: when http response body contain text user define, mark as success.
- BODY_NOT_CONTAINS: when http response body do not contain text user define, mark as success.
"""
STATUS_CODE_DEFAULT = "STATUS_CODE_DEFAULT"
STATUS_CODE_CUSTOM = "STATUS_CODE_CUSTOM"
BODY_CONTAINS = "BODY_CONTAINS"
BODY_NOT_CONTAINS = "BODY_NOT_CONTAINS"
class Http(Task):
"""Task HTTP object, declare behavior for HTTP task to dolphinscheduler."""
_task_custom_attr = {
"url",
"http_method",
"http_params",
"http_check_condition",
"condition",
"connect_timeout",
"socket_timeout",
}
def __init__(
self,
name: str,
url: str,
http_method: Optional[str] = HttpMethod.GET,
http_params: Optional[str] = None,
http_check_condition: Optional[str] = HttpCheckCondition.STATUS_CODE_DEFAULT,
condition: Optional[str] = None,
connect_timeout: Optional[int] = 60000,
socket_timeout: Optional[int] = 60000,
*args,
**kwargs
):
super().__init__(name, TaskType.HTTP, *args, **kwargs)
self.url = url
if not hasattr(HttpMethod, http_method):
raise PyDSParamException(
"Parameter http_method %s not support.", http_method
)
self.http_method = http_method
self.http_params = http_params or []
if not hasattr(HttpCheckCondition, http_check_condition):
raise PyDSParamException(
"Parameter http_check_condition %s not support.", http_check_condition
)
self.http_check_condition = http_check_condition
if (
http_check_condition != HttpCheckCondition.STATUS_CODE_DEFAULT
and condition is None
):
raise PyDSParamException(
"Parameter condition must provider if http_check_condition not equal to STATUS_CODE_DEFAULT"
)
self.condition = condition
self.connect_timeout = connect_timeout
self.socket_timeout = socket_timeout

60
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/procedure.py

@ -0,0 +1,60 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Task procedure."""
from typing import Dict
from pydolphinscheduler.constants import TaskType
from pydolphinscheduler.core.database import Database
from pydolphinscheduler.core.task import Task
class Procedure(Task):
"""Task Procedure object, declare behavior for Procedure task to dolphinscheduler.
It should run database procedure job in multiply sql lik engine, such as:
- ClickHouse
- DB2
- HIVE
- MySQL
- Oracle
- Postgresql
- Presto
- SQLServer
You provider datasource_name contain connection information, it decisions which
database type and database instance would run this sql.
"""
_task_custom_attr = {"method"}
def __init__(self, name: str, datasource_name: str, method: str, *args, **kwargs):
super().__init__(name, TaskType.PROCEDURE, *args, **kwargs)
self.datasource_name = datasource_name
self.method = method
@property
def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict:
"""Override Task.task_params for produce task.
produce task have some specials attribute for task_params, and is odd if we
directly set as python property, so we Override Task.task_params here.
"""
params = super().task_params
datasource = Database(self.datasource_name, "type", "datasource")
params.update(datasource)
return params

51
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/python.py

@ -0,0 +1,51 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Task Python."""
import inspect
import types
from typing import Any
from pydolphinscheduler.constants import TaskType
from pydolphinscheduler.core.task import Task
from pydolphinscheduler.exceptions import PyDSParamException
class Python(Task):
"""Task Python object, declare behavior for Python task to dolphinscheduler."""
_task_custom_attr = {
"raw_script",
}
def __init__(self, name: str, code: Any, *args, **kwargs):
super().__init__(name, TaskType.PYTHON, *args, **kwargs)
self._code = code
@property
def raw_script(self) -> str:
"""Get python task define attribute `raw_script`."""
if isinstance(self._code, str):
return self._code
elif isinstance(self._code, types.FunctionType):
py_function = inspect.getsource(self._code)
return py_function
else:
raise PyDSParamException(
"Parameter code do not support % for now.", type(self._code)
)

38
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/shell.py

@ -0,0 +1,38 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Task shell."""
from pydolphinscheduler.constants import TaskType
from pydolphinscheduler.core.task import Task
class Shell(Task):
"""Task shell object, declare behavior for shell task to dolphinscheduler.
TODO maybe we could use instance name to replace attribute `name`
which is simplify as `task_shell = Shell(command = "echo 1")` and
task.name assign to `task_shell`
"""
_task_custom_attr = {
"raw_script",
}
def __init__(self, name: str, command: str, *args, **kwargs):
super().__init__(name, TaskType.SHELL, *args, **kwargs)
self.raw_script = command

99
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sql.py

@ -0,0 +1,99 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Task sql."""
import re
from typing import Dict, Optional
from pydolphinscheduler.constants import TaskType
from pydolphinscheduler.core.database import Database
from pydolphinscheduler.core.task import Task
class SqlType:
"""SQL type, for now it just contain `SELECT` and `NO_SELECT`."""
SELECT = 0
NOT_SELECT = 1
class Sql(Task):
"""Task SQL object, declare behavior for SQL task to dolphinscheduler.
It should run sql job in multiply sql lik engine, such as:
- ClickHouse
- DB2
- HIVE
- MySQL
- Oracle
- Postgresql
- Presto
- SQLServer
You provider datasource_name contain connection information, it decisions which
database type and database instance would run this sql.
"""
_task_custom_attr = {
"sql",
"sql_type",
"pre_statements",
"post_statements",
"display_rows",
}
def __init__(
self,
name: str,
datasource_name: str,
sql: str,
pre_statements: Optional[str] = None,
post_statements: Optional[str] = None,
display_rows: Optional[int] = 10,
*args,
**kwargs
):
super().__init__(name, TaskType.SQL, *args, **kwargs)
self.sql = sql
self.datasource_name = datasource_name
self.pre_statements = pre_statements or []
self.post_statements = post_statements or []
self.display_rows = display_rows
@property
def sql_type(self) -> int:
"""Judgement sql type, use regexp to check which type of the sql is."""
pattern_select_str = (
"^(?!(.* |)insert |(.* |)delete |(.* |)drop |(.* |)update |(.* |)alter ).*"
)
pattern_select = re.compile(pattern_select_str, re.IGNORECASE)
if pattern_select.match(self.sql) is None:
return SqlType.NOT_SELECT
else:
return SqlType.SELECT
@property
def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict:
"""Override Task.task_params for sql task.
sql task have some specials attribute for task_params, and is odd if we
directly set as python property, so we Override Task.task_params here.
"""
params = super().task_params
datasource = Database(self.datasource_name, "type", "datasource")
params.update(datasource)
return params

55
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/sub_process.py

@ -0,0 +1,55 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Task sub_process."""
from typing import Dict
from pydolphinscheduler.constants import TaskType
from pydolphinscheduler.core.task import Task
from pydolphinscheduler.exceptions import PyDSProcessDefinitionNotAssignException
from pydolphinscheduler.java_gateway import launch_gateway
class SubProcess(Task):
"""Task SubProcess object, declare behavior for SubProcess task to dolphinscheduler."""
_task_custom_attr = {"process_definition_code"}
def __init__(self, name: str, process_definition_name: str, *args, **kwargs):
super().__init__(name, TaskType.SUB_PROCESS, *args, **kwargs)
self.process_definition_name = process_definition_name
@property
def process_definition_code(self) -> str:
"""Get process definition code, a wrapper for :func:`get_process_definition_info`."""
return self.get_process_definition_info(self.process_definition_name).get(
"code"
)
def get_process_definition_info(self, process_definition_name: str) -> Dict:
"""Get process definition info from java gateway, contains process definition id, name, code."""
if not self.process_definition:
raise PyDSProcessDefinitionNotAssignException(
"ProcessDefinition must be provider for task SubProcess."
)
gateway = launch_gateway()
return gateway.entry_point.getProcessDefinitionInfo(
self.process_definition.user.name,
self.process_definition.project.name,
process_definition_name,
)

158
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/tasks/switch.py

@ -0,0 +1,158 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Task Switch."""
from typing import Dict, Optional
from pydolphinscheduler.constants import TaskType
from pydolphinscheduler.core.base import Base
from pydolphinscheduler.core.task import Task
from pydolphinscheduler.exceptions import PyDSParamException
class SwitchBranch(Base):
"""Base class of ConditionBranch of task switch.
It a parent class for :class:`Branch` and :class:`Default`.
"""
_DEFINE_ATTR = {
"next_node",
}
def __init__(self, task: Task, exp: Optional[str] = None):
super().__init__(f"Switch.{self.__class__.__name__.upper()}")
self.task = task
self.exp = exp
@property
def next_node(self) -> str:
"""Get task switch property next_node, it return task code when init class switch."""
return self.task.code
@property
def condition(self) -> Optional[str]:
"""Get task switch property condition."""
return self.exp
def get_define(self, camel_attr: bool = True) -> Dict:
"""Get :class:`ConditionBranch` definition attribute communicate to Java gateway server."""
if self.condition:
self._DEFINE_ATTR.add("condition")
return super().get_define()
class Branch(SwitchBranch):
"""Common condition branch for switch task.
If any condition in :class:`Branch` match, would set this :class:`Branch`'s task as downstream of task
switch. If all condition branch do not match would set :class:`Default`'s task as task switch downstream.
"""
def __init__(self, condition: str, task: Task):
super().__init__(task, condition)
class Default(SwitchBranch):
"""Class default branch for switch task.
If all condition of :class:`Branch` do not match, task switch would run the tasks in :class:`Default`
and set :class:`Default`'s task as switch downstream. Please notice that each switch condition
could only have one single :class:`Default`.
"""
def __init__(self, task: Task):
super().__init__(task)
class SwitchCondition(Base):
"""Set switch condition of given parameter."""
_DEFINE_ATTR = {
"depend_task_list",
}
def __init__(self, *args):
super().__init__(self.__class__.__name__)
self.args = args
def set_define_attr(self) -> None:
"""Set attribute to function :func:`get_define`.
It is a wrapper for both `And` and `Or` operator.
"""
result = []
num_branch_default = 0
for condition in self.args:
if isinstance(condition, SwitchBranch):
if num_branch_default < 1:
if isinstance(condition, Default):
self._DEFINE_ATTR.add("next_node")
setattr(self, "next_node", condition.next_node)
num_branch_default += 1
elif isinstance(condition, Branch):
result.append(condition.get_define())
else:
raise PyDSParamException(
"Task Switch's parameter only support exactly one default branch."
)
else:
raise PyDSParamException(
"Task Switch's parameter only support SwitchBranch but got %s.",
type(condition),
)
# Handle switch default branch, default value is `""` if not provide.
if num_branch_default == 0:
self._DEFINE_ATTR.add("next_node")
setattr(self, "next_node", "")
setattr(self, "depend_task_list", result)
def get_define(self, camel_attr=True) -> Dict:
"""Overwrite Base.get_define to get task Condition specific get define."""
self.set_define_attr()
return super().get_define()
class Switch(Task):
"""Task switch object, declare behavior for switch task to dolphinscheduler."""
def __init__(self, name: str, condition: SwitchCondition, *args, **kwargs):
super().__init__(name, TaskType.SWITCH, *args, **kwargs)
self.condition = condition
# Set condition tasks as current task downstream
self._set_dep()
def _set_dep(self) -> None:
"""Set downstream according to parameter `condition`."""
downstream = []
for condition in self.condition.args:
if isinstance(condition, SwitchBranch):
downstream.append(condition.task)
self.set_downstream(downstream)
@property
def task_params(self, camel_attr: bool = True, custom_attr: set = None) -> Dict:
"""Override Task.task_params for switch task.
switch task have some specials attribute `switch`, and in most of the task
this attribute is None and use empty dict `{}` as default value. We do not use class
attribute `_task_custom_attr` due to avoid attribute cover.
"""
params = super().task_params
params["switchResult"] = self.condition.get_define()
return params

18
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/__init__.py

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init utils package."""

82
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/date.py

@ -0,0 +1,82 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Date util function collections."""
from datetime import datetime
from pydolphinscheduler.constants import Delimiter, Time
LEN_SUPPORT_DATETIME = (
15,
19,
)
FMT_SHORT = f"{Time.FMT_SHORT_DATE} {Time.FMT_NO_COLON_TIME}"
FMT_DASH = f"{Time.FMT_DASH_DATE} {Time.FMT_STD_TIME}"
FMT_STD = f"{Time.FMT_STD_DATE} {Time.FMT_STD_TIME}"
MAX_DATETIME = datetime(9999, 12, 31, 23, 59, 59)
def conv_to_schedule(src: datetime) -> str:
"""Convert given datetime to schedule date string."""
return datetime.strftime(src, FMT_STD)
def conv_from_str(src: str) -> datetime:
"""Convert given string to datetime.
This function give an ability to convert string to datetime, and for now it could handle
format like:
- %Y-%m-%d
- %Y/%m/%d
- %Y%m%d
- %Y-%m-%d %H:%M:%S
- %Y/%m/%d %H:%M:%S
- %Y%m%d %H%M%S
If pattern not like above be given will raise NotImplementedError.
"""
len_ = len(src)
if len_ == Time.LEN_SHORT_DATE:
return datetime.strptime(src, Time.FMT_SHORT_DATE)
elif len_ == Time.LEN_STD_DATE:
if Delimiter.BAR in src:
return datetime.strptime(src, Time.FMT_STD_DATE)
elif Delimiter.DASH in src:
return datetime.strptime(src, Time.FMT_DASH_DATE)
else:
raise NotImplementedError(
"%s could not be convert to datetime for now.", src
)
elif len_ in LEN_SUPPORT_DATETIME:
if Delimiter.BAR in src and Delimiter.COLON in src:
return datetime.strptime(src, FMT_STD)
elif Delimiter.DASH in src and Delimiter.COLON in src:
return datetime.strptime(src, FMT_DASH)
elif (
Delimiter.DASH not in src
and Delimiter.BAR not in src
and Delimiter.COLON not in src
):
return datetime.strptime(src, FMT_SHORT)
else:
raise NotImplementedError(
"%s could not be convert to datetime for now.", src
)
else:
raise NotImplementedError("%s could not be convert to datetime for now.", src)

39
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/utils/string.py

@ -0,0 +1,39 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""String util function collections."""
from pydolphinscheduler.constants import Delimiter
def attr2camel(attr: str, include_private=True):
"""Covert class attribute name to camel case."""
if include_private:
attr = attr.lstrip(Delimiter.UNDERSCORE)
return snake2camel(attr)
def snake2camel(snake: str):
"""Covert snake case to camel case."""
components = snake.split(Delimiter.UNDERSCORE)
return components[0] + "".join(x.title() for x in components[1:])
def class_name2camel(class_name: str):
"""Covert class name string to camel case."""
class_name = class_name.lstrip(Delimiter.UNDERSCORE)
return class_name[0].lower() + snake2camel(class_name[1:])

18
dolphinscheduler-python/pydolphinscheduler/tests/__init__.py

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init tests package."""

18
dolphinscheduler-python/pydolphinscheduler/tests/core/__init__.py

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init core package tests."""

54
dolphinscheduler-python/pydolphinscheduler/tests/core/test_database.py

@ -0,0 +1,54 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test Database."""
from unittest.mock import patch
import pytest
from pydolphinscheduler.core.database import Database
TEST_DATABASE_DATASOURCE_NAME = "test_datasource"
TEST_DATABASE_TYPE_KEY = "type"
TEST_DATABASE_KEY = "datasource"
@pytest.mark.parametrize(
"expect",
[
{
TEST_DATABASE_TYPE_KEY: "mock_type",
TEST_DATABASE_KEY: 1,
}
],
)
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
@patch(
"pydolphinscheduler.core.database.Database.get_database_info",
return_value=({"id": 1, "type": "mock_type"}),
)
def test_get_datasource_detail(mock_datasource, mock_code_version, expect):
"""Test :func:`get_database_type` and :func:`get_database_id` can return expect value."""
database_info = Database(
TEST_DATABASE_DATASOURCE_NAME, TEST_DATABASE_TYPE_KEY, TEST_DATABASE_KEY
)
assert expect == database_info

342
dolphinscheduler-python/pydolphinscheduler/tests/core/test_process_definition.py

@ -0,0 +1,342 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test process definition."""
from datetime import datetime
from typing import Any
import pytest
from freezegun import freeze_time
from pydolphinscheduler.constants import (
ProcessDefinitionDefault,
ProcessDefinitionReleaseState,
)
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.exceptions import PyDSParamException
from pydolphinscheduler.side import Project, Tenant, User
from pydolphinscheduler.utils.date import conv_to_schedule
from tests.testing.task import Task
TEST_PROCESS_DEFINITION_NAME = "simple-test-process-definition"
@pytest.mark.parametrize("func", ["run", "submit", "start"])
def test_process_definition_key_attr(func):
"""Test process definition have specific functions or attributes."""
with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd:
assert hasattr(
pd, func
), f"ProcessDefinition instance don't have attribute `{func}`"
@pytest.mark.parametrize(
"name,value",
[
("timezone", ProcessDefinitionDefault.TIME_ZONE),
("project", Project(ProcessDefinitionDefault.PROJECT)),
("tenant", Tenant(ProcessDefinitionDefault.TENANT)),
(
"user",
User(
ProcessDefinitionDefault.USER,
ProcessDefinitionDefault.USER_PWD,
ProcessDefinitionDefault.USER_EMAIL,
ProcessDefinitionDefault.USER_PHONE,
ProcessDefinitionDefault.TENANT,
ProcessDefinitionDefault.QUEUE,
ProcessDefinitionDefault.USER_STATE,
),
),
("worker_group", ProcessDefinitionDefault.WORKER_GROUP),
("release_state", ProcessDefinitionReleaseState.ONLINE),
],
)
def test_process_definition_default_value(name, value):
"""Test process definition default attributes."""
with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd:
assert getattr(pd, name) == value, (
f"ProcessDefinition instance attribute `{name}` not with "
f"except default value `{getattr(pd, name)}`"
)
@pytest.mark.parametrize(
"name,cls,expect",
[
("name", str, "name"),
("description", str, "description"),
("schedule", str, "schedule"),
("timezone", str, "timezone"),
("worker_group", str, "worker_group"),
("timeout", int, 1),
("release_state", str, "OFFLINE"),
("param", dict, {"key": "value"}),
],
)
def test_set_attr(name, cls, expect):
"""Test process definition set attributes which get with same type."""
with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd:
setattr(pd, name, expect)
assert (
getattr(pd, name) == expect
), f"ProcessDefinition set attribute `{name}` do not work expect"
@pytest.mark.parametrize(
"set_attr,set_val,get_attr,get_val",
[
("_project", "project", "project", Project("project")),
("_tenant", "tenant", "tenant", Tenant("tenant")),
("_start_time", "2021-01-01", "start_time", datetime(2021, 1, 1)),
("_end_time", "2021-01-01", "end_time", datetime(2021, 1, 1)),
],
)
def test_set_attr_return_special_object(set_attr, set_val, get_attr, get_val):
"""Test process definition set attributes which get with different type."""
with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd:
setattr(pd, set_attr, set_val)
assert get_val == getattr(
pd, get_attr
), f"Set attribute {set_attr} can not get back with {get_val}."
@pytest.mark.parametrize(
"val,expect",
[
(datetime(2021, 1, 1), datetime(2021, 1, 1)),
(None, None),
("2021-01-01", datetime(2021, 1, 1)),
("2021-01-01 01:01:01", datetime(2021, 1, 1, 1, 1, 1)),
],
)
def test__parse_datetime(val, expect):
"""Test process definition function _parse_datetime.
Only two datetime test cases here because we have more test cases in tests/utils/test_date.py file.
"""
with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd:
assert expect == pd._parse_datetime(
val
), f"Function _parse_datetime with unexpect value by {val}."
@pytest.mark.parametrize(
"val",
[
20210101,
(2021, 1, 1),
{"year": "2021", "month": "1", "day": 1},
],
)
def test__parse_datetime_not_support_type(val: Any):
"""Test process definition function _parse_datetime not support type error."""
with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd:
with pytest.raises(PyDSParamException, match="Do not support value type.*?"):
pd._parse_datetime(val)
def test_process_definition_get_define_without_task():
"""Test process definition function get_define without task."""
expect = {
"name": TEST_PROCESS_DEFINITION_NAME,
"description": None,
"project": ProcessDefinitionDefault.PROJECT,
"tenant": ProcessDefinitionDefault.TENANT,
"workerGroup": ProcessDefinitionDefault.WORKER_GROUP,
"timeout": 0,
"releaseState": ProcessDefinitionReleaseState.ONLINE,
"param": None,
"tasks": {},
"taskDefinitionJson": [{}],
"taskRelationJson": [{}],
}
with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd:
assert pd.get_define() == expect
def test_process_definition_simple_context_manager():
"""Test simple create workflow in process definition context manager mode."""
expect_tasks_num = 5
with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME) as pd:
for i in range(expect_tasks_num):
curr_task = Task(name=f"task-{i}", task_type=f"type-{i}")
# Set deps task i as i-1 parent
if i > 0:
pre_task = pd.get_one_task_by_name(f"task-{i - 1}")
curr_task.set_upstream(pre_task)
assert len(pd.tasks) == expect_tasks_num
# Test if task process_definition same as origin one
task: Task = pd.get_one_task_by_name("task-0")
assert pd is task.process_definition
# Test if all tasks with expect deps
for i in range(expect_tasks_num):
task: Task = pd.get_one_task_by_name(f"task-{i}")
if i == 0:
assert task._upstream_task_codes == set()
assert task._downstream_task_codes == {
pd.get_one_task_by_name("task-1").code
}
elif i == expect_tasks_num - 1:
assert task._upstream_task_codes == {
pd.get_one_task_by_name(f"task-{i - 1}").code
}
assert task._downstream_task_codes == set()
else:
assert task._upstream_task_codes == {
pd.get_one_task_by_name(f"task-{i - 1}").code
}
assert task._downstream_task_codes == {
pd.get_one_task_by_name(f"task-{i + 1}").code
}
def test_process_definition_simple_separate():
"""Test process definition simple create workflow in separate mode.
This test just test basic information, cause most of test case is duplicate to
test_process_definition_simple_context_manager.
"""
expect_tasks_num = 5
pd = ProcessDefinition(TEST_PROCESS_DEFINITION_NAME)
for i in range(expect_tasks_num):
curr_task = Task(
name=f"task-{i}",
task_type=f"type-{i}",
process_definition=pd,
)
# Set deps task i as i-1 parent
if i > 0:
pre_task = pd.get_one_task_by_name(f"task-{i - 1}")
curr_task.set_upstream(pre_task)
assert len(pd.tasks) == expect_tasks_num
assert all(["task-" in task.name for task in pd.task_list])
@pytest.mark.parametrize(
"user_attrs",
[
{"tenant": "tenant_specific"},
{"queue": "queue_specific"},
{"tenant": "tenant_specific", "queue": "queue_specific"},
],
)
def test_set_process_definition_user_attr(user_attrs):
"""Test user with correct attributes if we specific assigned to process definition object."""
default_value = {
"tenant": ProcessDefinitionDefault.TENANT,
"queue": ProcessDefinitionDefault.QUEUE,
}
with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME, **user_attrs) as pd:
user = pd.user
for attr in default_value:
# Get assigned attribute if we specific, else get default value
except_attr = (
user_attrs[attr] if attr in user_attrs else default_value[attr]
)
# Get actually attribute of user object
actual_attr = getattr(user, attr)
assert (
except_attr == actual_attr
), f"Except attribute is {except_attr} but get {actual_attr}"
def test_schedule_json_none_schedule():
"""Test function schedule_json with None as schedule."""
with ProcessDefinition(
TEST_PROCESS_DEFINITION_NAME,
schedule=None,
) as pd:
assert pd.schedule_json is None
# We freeze time here, because we test start_time with None, and if will get datetime.datetime.now. If we do
# not freeze time, it will cause flaky test here.
@freeze_time("2021-01-01")
@pytest.mark.parametrize(
"start_time,end_time,expect_date",
[
(
"20210101",
"20210201",
{"start_time": "2021-01-01 00:00:00", "end_time": "2021-02-01 00:00:00"},
),
(
"2021-01-01",
"2021-02-01",
{"start_time": "2021-01-01 00:00:00", "end_time": "2021-02-01 00:00:00"},
),
(
"2021/01/01",
"2021/02/01",
{"start_time": "2021-01-01 00:00:00", "end_time": "2021-02-01 00:00:00"},
),
# Test mix pattern
(
"2021/01/01 01:01:01",
"2021-02-02 02:02:02",
{"start_time": "2021-01-01 01:01:01", "end_time": "2021-02-02 02:02:02"},
),
(
"2021/01/01 01:01:01",
"20210202 020202",
{"start_time": "2021-01-01 01:01:01", "end_time": "2021-02-02 02:02:02"},
),
(
"20210101 010101",
"2021-02-02 02:02:02",
{"start_time": "2021-01-01 01:01:01", "end_time": "2021-02-02 02:02:02"},
),
# Test None value
(
"2021/01/01 01:02:03",
None,
{"start_time": "2021-01-01 01:02:03", "end_time": "9999-12-31 23:59:59"},
),
(
None,
None,
{
"start_time": conv_to_schedule(datetime(2021, 1, 1)),
"end_time": "9999-12-31 23:59:59",
},
),
],
)
def test_schedule_json_start_and_end_time(start_time, end_time, expect_date):
"""Test function schedule_json about handle start_time and end_time.
Only two datetime test cases here because we have more test cases in tests/utils/test_date.py file.
"""
schedule = "0 0 0 * * ? *"
expect = {
"crontab": schedule,
"startTime": expect_date["start_time"],
"endTime": expect_date["end_time"],
"timezoneId": ProcessDefinitionDefault.TIME_ZONE,
}
with ProcessDefinition(
TEST_PROCESS_DEFINITION_NAME,
schedule=schedule,
start_time=start_time,
end_time=end_time,
timezone=ProcessDefinitionDefault.TIME_ZONE,
) as pd:
assert pd.schedule_json == expect

224
dolphinscheduler-python/pydolphinscheduler/tests/core/test_task.py

@ -0,0 +1,224 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test Task class function."""
from unittest.mock import patch
import pytest
from pydolphinscheduler.core.task import Task, TaskRelation
from tests.testing.task import Task as testTask
TEST_TASK_RELATION_SET = set()
TEST_TASK_RELATION_SIZE = 0
@pytest.mark.parametrize(
"attr, expect",
[
(
dict(),
{
"localParams": [],
"resourceList": [],
"dependence": {},
"waitStartTimeout": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
},
),
(
{
"local_params": ["foo", "bar"],
"resource_list": ["foo", "bar"],
"dependence": {"foo", "bar"},
"wait_start_timeout": {"foo", "bar"},
"condition_result": {"foo": ["bar"]},
},
{
"localParams": ["foo", "bar"],
"resourceList": ["foo", "bar"],
"dependence": {"foo", "bar"},
"waitStartTimeout": {"foo", "bar"},
"conditionResult": {"foo": ["bar"]},
},
),
],
)
def test_property_task_params(attr, expect):
"""Test class task property."""
task = testTask(
"test-property-task-params",
"test-task",
**attr,
)
assert expect == task.task_params
@pytest.mark.parametrize(
"pre_code, post_code, expect",
[
(123, 456, hash("123 -> 456")),
(12345678, 987654321, hash("12345678 -> 987654321")),
],
)
def test_task_relation_hash_func(pre_code, post_code, expect):
"""Test TaskRelation magic function :func:`__hash__`."""
task_param = TaskRelation(pre_task_code=pre_code, post_task_code=post_code)
assert hash(task_param) == expect
@pytest.mark.parametrize(
"pre_code, post_code, size_add",
[
(123, 456, 1),
(123, 456, 0),
(456, 456, 1),
(123, 123, 1),
(456, 123, 1),
(0, 456, 1),
(123, 0, 1),
],
)
def test_task_relation_add_to_set(pre_code, post_code, size_add):
"""Test TaskRelation with different pre_code and post_code add to set behavior.
Here we use global variable to keep set of :class:`TaskRelation` instance and the number we expect
of the size when we add a new task relation to exists set.
"""
task_relation = TaskRelation(pre_task_code=pre_code, post_task_code=post_code)
TEST_TASK_RELATION_SET.add(task_relation)
# hint python interpreter use global variable instead of local's
global TEST_TASK_RELATION_SIZE
TEST_TASK_RELATION_SIZE += size_add
assert len(TEST_TASK_RELATION_SET) == TEST_TASK_RELATION_SIZE
def test_task_relation_to_dict():
"""Test TaskRelation object function to_dict."""
pre_task_code = 123
post_task_code = 456
expect = {
"name": "",
"preTaskCode": pre_task_code,
"postTaskCode": post_task_code,
"preTaskVersion": 1,
"postTaskVersion": 1,
"conditionType": 0,
"conditionParams": {},
}
task_relation = TaskRelation(
pre_task_code=pre_task_code, post_task_code=post_task_code
)
assert task_relation.get_define() == expect
def test_task_get_define():
"""Test Task object function get_define."""
code = 123
version = 1
name = "test_task_get_define"
task_type = "test_task_get_define_type"
expect = {
"code": code,
"name": name,
"version": version,
"description": None,
"delayTime": 0,
"taskType": task_type,
"taskParams": {
"resourceList": [],
"localParams": [],
"dependence": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
"waitStartTimeout": {},
},
"flag": "YES",
"taskPriority": "MEDIUM",
"workerGroup": "default",
"failRetryTimes": 0,
"failRetryInterval": 1,
"timeoutFlag": "CLOSE",
"timeoutNotifyStrategy": None,
"timeout": 0,
}
with patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(code, version),
):
task = Task(name=name, task_type=task_type)
assert task.get_define() == expect
@pytest.mark.parametrize("shift", ["<<", ">>"])
def test_two_tasks_shift(shift: str):
"""Test bit operator between tasks.
Here we test both `>>` and `<<` bit operator.
"""
upstream = testTask(name="upstream", task_type=shift)
downstream = testTask(name="downstream", task_type=shift)
if shift == "<<":
downstream << upstream
elif shift == ">>":
upstream >> downstream
else:
assert False, f"Unexpect bit operator type {shift}."
assert (
1 == len(upstream._downstream_task_codes)
and downstream.code in upstream._downstream_task_codes
), "Task downstream task attributes error, downstream codes size or specific code failed."
assert (
1 == len(downstream._upstream_task_codes)
and upstream.code in downstream._upstream_task_codes
), "Task upstream task attributes error, upstream codes size or upstream code failed."
@pytest.mark.parametrize(
"dep_expr, flag",
[
("task << tasks", "upstream"),
("tasks << task", "downstream"),
("task >> tasks", "downstream"),
("tasks >> task", "upstream"),
],
)
def test_tasks_list_shift(dep_expr: str, flag: str):
"""Test bit operator between task and sequence of tasks.
Here we test both `>>` and `<<` bit operator.
"""
reverse_dict = {
"upstream": "downstream",
"downstream": "upstream",
}
task_type = "dep_task_and_tasks"
task = testTask(name="upstream", task_type=task_type)
tasks = [
testTask(name="downstream1", task_type=task_type),
testTask(name="downstream2", task_type=task_type),
]
# Use build-in function eval to simply test case and reduce duplicate code
eval(dep_expr)
direction_attr = f"_{flag}_task_codes"
reverse_direction_attr = f"_{reverse_dict[flag]}_task_codes"
assert 2 == len(getattr(task, direction_attr))
assert [t.code in getattr(task, direction_attr) for t in tasks]
assert all([1 == len(getattr(t, reverse_direction_attr)) for t in tasks])
assert all([task.code in getattr(t, reverse_direction_attr) for t in tasks])

18
dolphinscheduler-python/pydolphinscheduler/tests/tasks/__init__.py

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init tasks package tests."""

439
dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_condition.py

@ -0,0 +1,439 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test Task dependent."""
from typing import List, Tuple
from unittest.mock import patch
import pytest
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.exceptions import PyDSParamException
from pydolphinscheduler.tasks.condition import (
FAILURE,
SUCCESS,
And,
ConditionOperator,
Conditions,
Or,
Status,
)
from tests.testing.task import Task
TEST_NAME = "test-name"
TEST_PROJECT = "test-project"
TEST_PROCESS_DEFINITION = "test-process-definition"
TEST_TYPE = "test-type"
TEST_PROJECT_CODE, TEST_DEFINITION_CODE, TEST_TASK_CODE = 12345, 123456, 1234567
TEST_OPERATOR_LIST = ("AND", "OR")
@pytest.mark.parametrize(
"obj, expect",
[
(Status, "STATUS"),
(SUCCESS, "SUCCESS"),
(FAILURE, "FAILURE"),
],
)
def test_class_status_status_name(obj: Status, expect: str):
"""Test class status and sub class property status_name."""
assert obj.status_name() == expect
@pytest.mark.parametrize(
"obj, tasks",
[
(Status, (1, 2, 3)),
(SUCCESS, (1.1, 2.2, 3.3)),
(FAILURE, (ConditionOperator(1), ConditionOperator(2), ConditionOperator(3))),
],
)
def test_class_status_depend_item_list_no_expect_type(obj: Status, tasks: Tuple):
"""Test class status and sub class raise error when assign not support type."""
with pytest.raises(
PyDSParamException, match=".*?only accept class Task or sub class Task, but get"
):
obj(*tasks).get_define()
@pytest.mark.parametrize(
"obj, tasks",
[
(Status, [Task(str(i), TEST_TYPE) for i in range(1)]),
(Status, [Task(str(i), TEST_TYPE) for i in range(2)]),
(Status, [Task(str(i), TEST_TYPE) for i in range(3)]),
(SUCCESS, [Task(str(i), TEST_TYPE) for i in range(1)]),
(SUCCESS, [Task(str(i), TEST_TYPE) for i in range(2)]),
(SUCCESS, [Task(str(i), TEST_TYPE) for i in range(3)]),
(FAILURE, [Task(str(i), TEST_TYPE) for i in range(1)]),
(FAILURE, [Task(str(i), TEST_TYPE) for i in range(2)]),
(FAILURE, [Task(str(i), TEST_TYPE) for i in range(3)]),
],
)
def test_class_status_depend_item_list(obj: Status, tasks: Tuple):
"""Test class status and sub class function :func:`depend_item_list`."""
status = obj.status_name()
expect = [
{
"depTaskCode": i.code,
"status": status,
}
for i in tasks
]
assert obj(*tasks).get_define() == expect
@pytest.mark.parametrize(
"obj, expect",
[
(ConditionOperator, "CONDITIONOPERATOR"),
(And, "AND"),
(Or, "OR"),
],
)
def test_condition_operator_operator_name(obj: ConditionOperator, expect: str):
"""Test class ConditionOperator and sub class class function :func:`operator_name`."""
assert obj.operator_name() == expect
@pytest.mark.parametrize(
"obj, expect",
[
(ConditionOperator, "CONDITIONOPERATOR"),
(And, "AND"),
(Or, "OR"),
],
)
def test_condition_operator_relation(obj: ConditionOperator, expect: str):
"""Test class ConditionOperator and sub class class property `relation`."""
assert obj(1).relation == expect
@pytest.mark.parametrize(
"obj, status_or_operator, match",
[
(
ConditionOperator,
[Status(Task("1", TEST_TYPE)), 1],
".*?operator parameter support ConditionTask and ConditionOperator.*?",
),
(
ConditionOperator,
[
Status(Task("1", TEST_TYPE)),
1.0,
],
".*?operator parameter support ConditionTask and ConditionOperator.*?",
),
(
ConditionOperator,
[
Status(Task("1", TEST_TYPE)),
ConditionOperator(And(Status(Task("1", TEST_TYPE)))),
],
".*?operator parameter only support same type.",
),
(
ConditionOperator,
[
ConditionOperator(And(Status(Task("1", TEST_TYPE)))),
Status(Task("1", TEST_TYPE)),
],
".*?operator parameter only support same type.",
),
],
)
def test_condition_operator_set_define_attr_not_support_type(
obj, status_or_operator, match
):
"""Test class ConditionOperator parameter error, including parameter not same or type not support."""
with pytest.raises(PyDSParamException, match=match):
op = obj(*status_or_operator)
op.set_define_attr()
@pytest.mark.parametrize(
"obj, task_num",
[
(ConditionOperator, 1),
(ConditionOperator, 2),
(ConditionOperator, 3),
(And, 1),
(And, 2),
(And, 3),
(Or, 1),
(Or, 2),
(Or, 3),
],
)
def test_condition_operator_set_define_attr_status(
obj: ConditionOperator, task_num: int
):
"""Test :func:`set_define_attr` with one or more class status."""
attr = "depend_item_list"
tasks = [Task(str(i), TEST_TYPE) for i in range(task_num)]
status = Status(*tasks)
expect = [
{"depTaskCode": task.code, "status": status.status_name()} for task in tasks
]
co = obj(status)
co.set_define_attr()
assert getattr(co, attr) == expect
@pytest.mark.parametrize(
"obj, status",
[
(ConditionOperator, (SUCCESS, SUCCESS)),
(ConditionOperator, (FAILURE, FAILURE)),
(ConditionOperator, (SUCCESS, FAILURE)),
(ConditionOperator, (FAILURE, SUCCESS)),
(And, (SUCCESS, SUCCESS)),
(And, (FAILURE, FAILURE)),
(And, (SUCCESS, FAILURE)),
(And, (FAILURE, SUCCESS)),
(Or, (SUCCESS, SUCCESS)),
(Or, (FAILURE, FAILURE)),
(Or, (SUCCESS, FAILURE)),
(Or, (FAILURE, SUCCESS)),
],
)
def test_condition_operator_set_define_attr_mix_status(
obj: ConditionOperator, status: List[Status]
):
"""Test :func:`set_define_attr` with one or more mixed status."""
attr = "depend_item_list"
task = Task("test-operator", TEST_TYPE)
status_list = []
expect = []
for sta in status:
status_list.append(sta(task))
expect.append({"depTaskCode": task.code, "status": sta.status_name()})
co = obj(*status_list)
co.set_define_attr()
assert getattr(co, attr) == expect
@pytest.mark.parametrize(
"obj, task_num",
[
(ConditionOperator, 1),
(ConditionOperator, 2),
(ConditionOperator, 3),
(And, 1),
(And, 2),
(And, 3),
(Or, 1),
(Or, 2),
(Or, 3),
],
)
def test_condition_operator_set_define_attr_operator(
obj: ConditionOperator, task_num: int
):
"""Test :func:`set_define_attr` with one or more class condition operator."""
attr = "depend_task_list"
task = Task("test-operator", TEST_TYPE)
status = Status(task)
expect = [
{
"relation": obj.operator_name(),
"dependItemList": [
{
"depTaskCode": task.code,
"status": status.status_name(),
}
],
}
for _ in range(task_num)
]
co = obj(*[obj(status) for _ in range(task_num)])
co.set_define_attr()
assert getattr(co, attr) == expect
@pytest.mark.parametrize(
"cond, sub_cond",
[
(ConditionOperator, (And, Or)),
(ConditionOperator, (Or, And)),
(And, (And, Or)),
(And, (Or, And)),
(Or, (And, Or)),
(Or, (Or, And)),
],
)
def test_condition_operator_set_define_attr_mix_operator(
cond: ConditionOperator, sub_cond: Tuple[ConditionOperator]
):
"""Test :func:`set_define_attr` with one or more class mix condition operator."""
attr = "depend_task_list"
task = Task("test-operator", TEST_TYPE)
expect = []
sub_condition = []
for cond in sub_cond:
status = Status(task)
sub_condition.append(cond(status))
expect.append(
{
"relation": cond.operator_name(),
"dependItemList": [
{
"depTaskCode": task.code,
"status": status.status_name(),
}
],
}
)
co = cond(*sub_condition)
co.set_define_attr()
assert getattr(co, attr) == expect
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(12345, 1),
)
@patch(
"pydolphinscheduler.tasks.condition.Conditions.gen_code_and_version",
return_value=(123, 1),
)
def test_dependent_get_define(mock_condition_code_version, mock_task_code_version):
"""Test task condition :func:`get_define`."""
common_task = Task(name="common_task", task_type="test_task_condition")
cond_operator = And(
And(
SUCCESS(common_task, common_task),
FAILURE(common_task, common_task),
),
Or(
SUCCESS(common_task, common_task),
FAILURE(common_task, common_task),
),
)
name = "test_condition_get_define"
expect = {
"code": 123,
"name": name,
"version": 1,
"description": None,
"delayTime": 0,
"taskType": "CONDITIONS",
"taskParams": {
"resourceList": [],
"localParams": [],
"dependence": {
"relation": "AND",
"dependTaskList": [
{
"relation": "AND",
"dependItemList": [
{"depTaskCode": common_task.code, "status": "SUCCESS"},
{"depTaskCode": common_task.code, "status": "SUCCESS"},
{"depTaskCode": common_task.code, "status": "FAILURE"},
{"depTaskCode": common_task.code, "status": "FAILURE"},
],
},
{
"relation": "OR",
"dependItemList": [
{"depTaskCode": common_task.code, "status": "SUCCESS"},
{"depTaskCode": common_task.code, "status": "SUCCESS"},
{"depTaskCode": common_task.code, "status": "FAILURE"},
{"depTaskCode": common_task.code, "status": "FAILURE"},
],
},
],
},
"conditionResult": {"successNode": [""], "failedNode": [""]},
"waitStartTimeout": {},
},
"flag": "YES",
"taskPriority": "MEDIUM",
"workerGroup": "default",
"failRetryTimes": 0,
"failRetryInterval": 1,
"timeoutFlag": "CLOSE",
"timeoutNotifyStrategy": None,
"timeout": 0,
}
task = Conditions(name, condition=cond_operator)
assert task.get_define() == expect
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
def test_condition_set_dep_workflow(mock_task_code_version):
"""Test task condition set dependence in workflow level."""
with ProcessDefinition(name="test-condition-set-dep-workflow") as pd:
parent = Task(name="parent", task_type=TEST_TYPE)
condition_success_1 = Task(name="condition_success_1", task_type=TEST_TYPE)
condition_success_2 = Task(name="condition_success_2", task_type=TEST_TYPE)
condition_fail = Task(name="condition_fail", task_type=TEST_TYPE)
cond_operator = And(
And(
SUCCESS(condition_success_1, condition_success_2),
FAILURE(condition_fail),
),
)
condition = Conditions(name=TEST_NAME, condition=cond_operator)
parent >> condition
# General tasks test
assert len(pd.tasks) == 5
assert sorted(pd.task_list, key=lambda t: t.name) == sorted(
[
parent,
condition,
condition_success_1,
condition_success_2,
condition_fail,
],
key=lambda t: t.name,
)
# Task dep test
assert parent._downstream_task_codes == {condition.code}
assert condition._upstream_task_codes == {parent.code}
# Condition task dep after ProcessDefinition function get_define called
assert condition._downstream_task_codes == {
condition_success_1.code,
condition_success_2.code,
condition_fail.code,
}
assert all(
[
child._upstream_task_codes == {condition.code}
for child in [condition_success_1, condition_success_2, condition_fail]
]
)

124
dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_datax.py

@ -0,0 +1,124 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test Task DataX."""
from unittest.mock import patch
import pytest
from pydolphinscheduler.tasks.datax import CustomDataX, DataX
@patch(
"pydolphinscheduler.core.database.Database.get_database_info",
return_value=({"id": 1, "type": "MYSQL"}),
)
def test_datax_get_define(mock_datasource):
"""Test task datax function get_define."""
code = 123
version = 1
name = "test_datax_get_define"
command = "select name from test_source_table_name"
datasource_name = "test_datasource"
datatarget_name = "test_datatarget"
target_table = "test_target_table_name"
expect = {
"code": code,
"name": name,
"version": 1,
"description": None,
"delayTime": 0,
"taskType": "DATAX",
"taskParams": {
"customConfig": 0,
"dsType": "MYSQL",
"dataSource": 1,
"dtType": "MYSQL",
"dataTarget": 1,
"sql": command,
"targetTable": target_table,
"jobSpeedByte": 0,
"jobSpeedRecord": 1000,
"xms": 1,
"xmx": 1,
"preStatements": [],
"postStatements": [],
"localParams": [],
"resourceList": [],
"dependence": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
"waitStartTimeout": {},
},
"flag": "YES",
"taskPriority": "MEDIUM",
"workerGroup": "default",
"failRetryTimes": 0,
"failRetryInterval": 1,
"timeoutFlag": "CLOSE",
"timeoutNotifyStrategy": None,
"timeout": 0,
}
with patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(code, version),
):
task = DataX(name, datasource_name, datatarget_name, command, target_table)
assert task.get_define() == expect
@pytest.mark.parametrize("json_template", ["json_template"])
def test_custom_datax_get_define(json_template):
"""Test task custom datax function get_define."""
code = 123
version = 1
name = "test_custom_datax_get_define"
expect = {
"code": code,
"name": name,
"version": 1,
"description": None,
"delayTime": 0,
"taskType": "DATAX",
"taskParams": {
"customConfig": 1,
"json": json_template,
"xms": 1,
"xmx": 1,
"localParams": [],
"resourceList": [],
"dependence": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
"waitStartTimeout": {},
},
"flag": "YES",
"taskPriority": "MEDIUM",
"workerGroup": "default",
"failRetryTimes": 0,
"failRetryInterval": 1,
"timeoutFlag": "CLOSE",
"timeoutNotifyStrategy": None,
"timeout": 0,
}
with patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(code, version),
):
task = CustomDataX(name, json_template)
print(task.get_define())
print(expect)
assert task.get_define() == expect

793
dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_dependent.py

@ -0,0 +1,793 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test Task dependent."""
import itertools
from typing import Dict, List, Optional, Tuple, Union
from unittest.mock import patch
import pytest
from pydolphinscheduler.exceptions import PyDSParamException
from pydolphinscheduler.tasks.dependent import (
And,
Dependent,
DependentDate,
DependentItem,
DependentOperator,
Or,
)
TEST_PROJECT = "test-project"
TEST_PROCESS_DEFINITION = "test-process-definition"
TEST_TASK = "test-task"
TEST_PROJECT_CODE, TEST_DEFINITION_CODE, TEST_TASK_CODE = 12345, 123456, 1234567
TEST_OPERATOR_LIST = ("AND", "OR")
@pytest.mark.parametrize(
"dep_date, dep_cycle",
[
# hour
(DependentDate.CURRENT_HOUR, "hour"),
(DependentDate.LAST_ONE_HOUR, "hour"),
(DependentDate.LAST_TWO_HOURS, "hour"),
(DependentDate.LAST_THREE_HOURS, "hour"),
(DependentDate.LAST_TWENTY_FOUR_HOURS, "hour"),
# day
(DependentDate.TODAY, "day"),
(DependentDate.LAST_ONE_DAYS, "day"),
(DependentDate.LAST_TWO_DAYS, "day"),
(DependentDate.LAST_THREE_DAYS, "day"),
(DependentDate.LAST_SEVEN_DAYS, "day"),
# week
(DependentDate.THIS_WEEK, "week"),
(DependentDate.LAST_WEEK, "week"),
(DependentDate.LAST_MONDAY, "week"),
(DependentDate.LAST_TUESDAY, "week"),
(DependentDate.LAST_WEDNESDAY, "week"),
(DependentDate.LAST_THURSDAY, "week"),
(DependentDate.LAST_FRIDAY, "week"),
(DependentDate.LAST_SATURDAY, "week"),
(DependentDate.LAST_SUNDAY, "week"),
# month
(DependentDate.THIS_MONTH, "month"),
(DependentDate.LAST_MONTH, "month"),
(DependentDate.LAST_MONTH_BEGIN, "month"),
(DependentDate.LAST_MONTH_END, "month"),
],
)
@patch(
"pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway",
return_value={
"projectCode": TEST_PROJECT_CODE,
"processDefinitionCode": TEST_DEFINITION_CODE,
"taskDefinitionCode": TEST_TASK_CODE,
},
)
def test_dependent_item_get_define(mock_task_info, dep_date, dep_cycle):
"""Test dependent.DependentItem get define.
Here we have test some cases as below.
```py
{
"projectCode": "project code",
"definitionCode": "definition code",
"depTaskCode": "dep task code",
"cycle": "day",
"dateValue": "today"
}
```
"""
attr = {
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": dep_date,
}
expect = {
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": dep_cycle,
"dateValue": dep_date,
}
task = DependentItem(**attr)
assert expect == task.get_define()
def test_dependent_item_date_error():
"""Test error when pass None to dependent_date."""
with pytest.raises(
PyDSParamException, match="Parameter dependent_date must provider.*?"
):
DependentItem(
project_name=TEST_PROJECT,
process_definition_name=TEST_PROCESS_DEFINITION,
dependent_date=None,
)
@pytest.mark.parametrize(
"task_name, result",
[
({"dependent_task_name": TEST_TASK}, TEST_TASK),
({}, None),
],
)
def test_dependent_item_code_parameter(task_name: dict, result: Optional[str]):
"""Test dependent item property code_parameter."""
dependent_item = DependentItem(
project_name=TEST_PROJECT,
process_definition_name=TEST_PROCESS_DEFINITION,
**task_name,
)
expect = (TEST_PROJECT, TEST_PROCESS_DEFINITION, result)
assert dependent_item.code_parameter == expect
@pytest.mark.parametrize(
"arg_list",
[
[1, 2],
[
DependentItem(
project_name=TEST_PROJECT,
process_definition_name=TEST_PROCESS_DEFINITION,
),
1,
],
[
And(
DependentItem(
project_name=TEST_PROJECT,
process_definition_name=TEST_PROCESS_DEFINITION,
)
),
1,
],
[
DependentItem(
project_name=TEST_PROJECT,
process_definition_name=TEST_PROCESS_DEFINITION,
),
And(
DependentItem(
project_name=TEST_PROJECT,
process_definition_name=TEST_PROCESS_DEFINITION,
)
),
],
],
)
@patch(
"pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway",
return_value={
"projectCode": TEST_PROJECT_CODE,
"processDefinitionCode": TEST_DEFINITION_CODE,
"taskDefinitionCode": TEST_TASK_CODE,
},
)
def test_dependent_operator_set_define_error(mock_code, arg_list):
"""Test dependent operator function :func:`set_define` with not support type."""
dep_op = DependentOperator(*arg_list)
with pytest.raises(PyDSParamException, match="Dependent .*? operator.*?"):
dep_op.set_define_attr()
@pytest.mark.parametrize(
# Test dependent operator, Test dependent item parameters, expect operator define
"operators, kwargs, expect",
[
# Test dependent operator (And | Or) with single dependent item
(
(And, Or),
(
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_MONTH_END,
},
),
[
{
"relation": op,
"dependItemList": [
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "month",
"dateValue": DependentDate.LAST_MONTH_END,
},
],
}
for op in TEST_OPERATOR_LIST
],
),
# Test dependent operator (And | Or) with two dependent item
(
(And, Or),
(
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_MONTH_END,
},
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_WEEK,
},
),
[
{
"relation": op,
"dependItemList": [
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "month",
"dateValue": DependentDate.LAST_MONTH_END,
},
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "week",
"dateValue": DependentDate.LAST_WEEK,
},
],
}
for op in TEST_OPERATOR_LIST
],
),
# Test dependent operator (And | Or) with multiply dependent item
(
(And, Or),
(
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_MONTH_END,
},
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_WEEK,
},
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_ONE_DAYS,
},
),
[
{
"relation": op,
"dependItemList": [
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "month",
"dateValue": DependentDate.LAST_MONTH_END,
},
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "week",
"dateValue": DependentDate.LAST_WEEK,
},
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "day",
"dateValue": DependentDate.LAST_ONE_DAYS,
},
],
}
for op in TEST_OPERATOR_LIST
],
),
],
)
@patch(
"pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway",
return_value={
"projectCode": TEST_PROJECT_CODE,
"processDefinitionCode": TEST_DEFINITION_CODE,
"taskDefinitionCode": TEST_TASK_CODE,
},
)
def test_operator_dependent_item(
mock_code_info,
operators: Tuple[DependentOperator],
kwargs: Tuple[dict],
expect: List[Dict],
):
"""Test DependentOperator(DependentItem) function get_define.
Here we have test some cases as below, including single dependentItem and multiply dependentItem.
```py
{
"relation": "AND",
"dependItemList": [
{
"projectCode": "project code",
"definitionCode": "definition code",
"depTaskCode": "dep task code",
"cycle": "day",
"dateValue": "today"
},
...
]
}
```
"""
for idx, operator in enumerate(operators):
# Use variable to keep one or more dependent item to test dependent operator behavior
dependent_item_list = []
for kwarg in kwargs:
dependent_item = DependentItem(**kwarg)
dependent_item_list.append(dependent_item)
op = operator(*dependent_item_list)
assert expect[idx] == op.get_define()
@pytest.mark.parametrize(
# Test dependent operator, Test dependent item parameters, expect operator define
"operators, args, expect",
[
# Test dependent operator (And | Or) with single dependent task list
(
(And, Or),
(
(And, Or),
(
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_MONTH_END,
},
),
),
[
{
"relation": par_op,
"dependTaskList": [
{
"relation": chr_op,
"dependItemList": [
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "month",
"dateValue": DependentDate.LAST_MONTH_END,
},
],
}
],
}
for (par_op, chr_op) in itertools.product(
TEST_OPERATOR_LIST, TEST_OPERATOR_LIST
)
],
),
# Test dependent operator (And | Or) with two dependent task list
(
(And, Or),
(
(And, Or),
(
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_MONTH_END,
},
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_WEEK,
},
),
),
[
{
"relation": par_op,
"dependTaskList": [
{
"relation": chr_op,
"dependItemList": [
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "month",
"dateValue": DependentDate.LAST_MONTH_END,
},
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "week",
"dateValue": DependentDate.LAST_WEEK,
},
],
}
],
}
for (par_op, chr_op) in itertools.product(
TEST_OPERATOR_LIST, TEST_OPERATOR_LIST
)
],
),
# Test dependent operator (And | Or) with multiply dependent task list
(
(And, Or),
(
(And, Or),
(
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_MONTH_END,
},
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_WEEK,
},
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_ONE_DAYS,
},
),
),
[
{
"relation": par_op,
"dependTaskList": [
{
"relation": chr_op,
"dependItemList": [
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "month",
"dateValue": DependentDate.LAST_MONTH_END,
},
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "week",
"dateValue": DependentDate.LAST_WEEK,
},
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "day",
"dateValue": DependentDate.LAST_ONE_DAYS,
},
],
}
],
}
for (par_op, chr_op) in itertools.product(
TEST_OPERATOR_LIST, TEST_OPERATOR_LIST
)
],
),
],
)
@patch(
"pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway",
return_value={
"projectCode": TEST_PROJECT_CODE,
"processDefinitionCode": TEST_DEFINITION_CODE,
"taskDefinitionCode": TEST_TASK_CODE,
},
)
def test_operator_dependent_task_list_multi_dependent_item(
mock_code_info,
operators: Tuple[DependentOperator],
args: Tuple[Union[Tuple, dict]],
expect: List[Dict],
):
"""Test DependentOperator(DependentOperator(DependentItem)) single operator function get_define.
Here we have test some cases as below. This test case only test single DependTaskList with one or
multiply dependItemList.
```py
{
"relation": "OR",
"dependTaskList": [
{
"relation": "AND",
"dependItemList": [
{
"projectCode": "project code",
"definitionCode": "definition code",
"depTaskCode": "dep task code",
"cycle": "day",
"dateValue": "today"
},
...
]
},
]
}
```
"""
# variable expect_idx record idx should be use to get specific expect
expect_idx = 0
for op_idx, operator in enumerate(operators):
dependent_operator = args[0]
dependent_item_kwargs = args[1]
for dop_idx, dpt_op in enumerate(dependent_operator):
dependent_item_list = []
for dpt_kwargs in dependent_item_kwargs:
dpti = DependentItem(**dpt_kwargs)
dependent_item_list.append(dpti)
child_dep_op = dpt_op(*dependent_item_list)
op = operator(child_dep_op)
assert expect[expect_idx] == op.get_define()
expect_idx += 1
def get_dep_task_list(*operator):
"""Return dependent task list from given operators list."""
result = []
for op in operator:
result.append(
{
"relation": op.operator_name(),
"dependItemList": [
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "month",
"dateValue": DependentDate.LAST_MONTH_END,
},
],
}
)
return result
@pytest.mark.parametrize(
# Test dependent operator, Test dependent item parameters, expect operator define
"operators, args, expect",
[
# Test dependent operator (And | Or) with two dependent task list
(
(And, Or),
(
((And, And), (And, Or), (Or, And), (Or, Or)),
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_MONTH_END,
},
),
[
{
"relation": parent_op.operator_name(),
"dependTaskList": get_dep_task_list(*child_ops),
}
for parent_op in (And, Or)
for child_ops in ((And, And), (And, Or), (Or, And), (Or, Or))
],
),
# Test dependent operator (And | Or) with multiple dependent task list
(
(And, Or),
(
((And, And, And), (And, And, And, And), (And, And, And, And, And)),
{
"project_name": TEST_PROJECT,
"process_definition_name": TEST_PROCESS_DEFINITION,
"dependent_task_name": TEST_TASK,
"dependent_date": DependentDate.LAST_MONTH_END,
},
),
[
{
"relation": parent_op.operator_name(),
"dependTaskList": get_dep_task_list(*child_ops),
}
for parent_op in (And, Or)
for child_ops in (
(And, And, And),
(And, And, And, And),
(And, And, And, And, And),
)
],
),
],
)
@patch(
"pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway",
return_value={
"projectCode": TEST_PROJECT_CODE,
"processDefinitionCode": TEST_DEFINITION_CODE,
"taskDefinitionCode": TEST_TASK_CODE,
},
)
def test_operator_dependent_task_list_multi_dependent_list(
mock_code_info,
operators: Tuple[DependentOperator],
args: Tuple[Union[Tuple, dict]],
expect: List[Dict],
):
"""Test DependentOperator(DependentOperator(DependentItem)) multiply operator function get_define.
Here we have test some cases as below. This test case only test single DependTaskList with one or
multiply dependTaskList.
```py
{
"relation": "OR",
"dependTaskList": [
{
"relation": "AND",
"dependItemList": [
{
"projectCode": "project code",
"definitionCode": "definition code",
"depTaskCode": "dep task code",
"cycle": "day",
"dateValue": "today"
}
]
},
...
]
}
```
"""
# variable expect_idx record idx should be use to get specific expect
expect_idx = 0
for op_idx, operator in enumerate(operators):
dependent_operator = args[0]
dependent_item_kwargs = args[1]
for dop_idx, dpt_ops in enumerate(dependent_operator):
dependent_task_list = [
dpt_op(DependentItem(**dependent_item_kwargs)) for dpt_op in dpt_ops
]
op = operator(*dependent_task_list)
assert (
expect[expect_idx] == op.get_define()
), f"Failed with operator syntax {operator}.{dpt_ops}"
expect_idx += 1
@patch(
"pydolphinscheduler.tasks.dependent.DependentItem.get_code_from_gateway",
return_value={
"projectCode": TEST_PROJECT_CODE,
"processDefinitionCode": TEST_DEFINITION_CODE,
"taskDefinitionCode": TEST_TASK_CODE,
},
)
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
def test_dependent_get_define(mock_code_version, mock_dep_code):
"""Test task dependent function get_define."""
project_name = "test-dep-project"
process_definition_name = "test-dep-definition"
dependent_task_name = "test-dep-task"
dep_operator = And(
Or(
# test dependence with add tasks
DependentItem(
project_name=project_name,
process_definition_name=process_definition_name,
)
),
And(
# test dependence with specific task
DependentItem(
project_name=project_name,
process_definition_name=process_definition_name,
dependent_task_name=dependent_task_name,
)
),
)
name = "test_dependent_get_define"
expect = {
"code": 123,
"name": name,
"version": 1,
"description": None,
"delayTime": 0,
"taskType": "DEPENDENT",
"taskParams": {
"resourceList": [],
"localParams": [],
"dependence": {
"relation": "AND",
"dependTaskList": [
{
"relation": "OR",
"dependItemList": [
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": "0",
"cycle": "day",
"dateValue": "today",
}
],
},
{
"relation": "AND",
"dependItemList": [
{
"projectCode": TEST_PROJECT_CODE,
"definitionCode": TEST_DEFINITION_CODE,
"depTaskCode": TEST_TASK_CODE,
"cycle": "day",
"dateValue": "today",
}
],
},
],
},
"conditionResult": {"successNode": [""], "failedNode": [""]},
"waitStartTimeout": {},
},
"flag": "YES",
"taskPriority": "MEDIUM",
"workerGroup": "default",
"failRetryTimes": 0,
"failRetryInterval": 1,
"timeoutFlag": "CLOSE",
"timeoutNotifyStrategy": None,
"timeout": 0,
}
task = Dependent(name, dependence=dep_operator)
assert task.get_define() == expect

144
dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_http.py

@ -0,0 +1,144 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test Task HTTP."""
from unittest.mock import patch
import pytest
from pydolphinscheduler.exceptions import PyDSParamException
from pydolphinscheduler.tasks.http import Http, HttpCheckCondition, HttpMethod
@pytest.mark.parametrize(
"class_name, attrs",
[
(HttpMethod, ("GET", "POST", "HEAD", "PUT", "DELETE")),
(
HttpCheckCondition,
(
"STATUS_CODE_DEFAULT",
"STATUS_CODE_CUSTOM",
"BODY_CONTAINS",
"BODY_NOT_CONTAINS",
),
),
],
)
def test_attr_exists(class_name, attrs):
"""Test weather class HttpMethod and HttpCheckCondition contain specific attribute."""
assert all(hasattr(class_name, attr) for attr in attrs)
@pytest.mark.parametrize(
"attr, expect",
[
(
{"url": "https://www.apache.org"},
{
"url": "https://www.apache.org",
"httpMethod": "GET",
"httpParams": [],
"httpCheckCondition": "STATUS_CODE_DEFAULT",
"condition": None,
"connectTimeout": 60000,
"socketTimeout": 60000,
"localParams": [],
"resourceList": [],
"dependence": {},
"waitStartTimeout": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
},
)
],
)
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
def test_property_task_params(mock_code_version, attr, expect):
"""Test task http property."""
task = Http("test-http-task-params", **attr)
assert expect == task.task_params
@pytest.mark.parametrize(
"param",
[
{"http_method": "http_method"},
{"http_check_condition": "http_check_condition"},
{"http_check_condition": HttpCheckCondition.STATUS_CODE_CUSTOM},
{
"http_check_condition": HttpCheckCondition.STATUS_CODE_CUSTOM,
"condition": None,
},
],
)
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
def test_http_task_param_not_support_param(mock_code, param):
"""Test HttpTaskParams not support parameter."""
url = "https://www.apache.org"
with pytest.raises(PyDSParamException, match="Parameter .*?"):
Http("test-no-supprot-param", url, **param)
def test_http_get_define():
"""Test task HTTP function get_define."""
code = 123
version = 1
name = "test_http_get_define"
url = "https://www.apache.org"
expect = {
"code": code,
"name": name,
"version": 1,
"description": None,
"delayTime": 0,
"taskType": "HTTP",
"taskParams": {
"localParams": [],
"httpParams": [],
"url": url,
"httpMethod": "GET",
"httpCheckCondition": "STATUS_CODE_DEFAULT",
"condition": None,
"connectTimeout": 60000,
"socketTimeout": 60000,
"dependence": {},
"resourceList": [],
"conditionResult": {"successNode": [""], "failedNode": [""]},
"waitStartTimeout": {},
},
"flag": "YES",
"taskPriority": "MEDIUM",
"workerGroup": "default",
"failRetryTimes": 0,
"failRetryInterval": 1,
"timeoutFlag": "CLOSE",
"timeoutNotifyStrategy": None,
"timeout": 0,
}
with patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(code, version),
):
http = Http(name, url)
assert http.get_define() == expect

106
dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_procedure.py

@ -0,0 +1,106 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test Task Procedure."""
from unittest.mock import patch
import pytest
from pydolphinscheduler.tasks.procedure import Procedure
TEST_PROCEDURE_SQL = (
'create procedure HelloWorld() selece "hello world"; call HelloWorld();'
)
TEST_PROCEDURE_DATASOURCE_NAME = "test_datasource"
@pytest.mark.parametrize(
"attr, expect",
[
(
{
"name": "test-procedure-task-params",
"datasource_name": TEST_PROCEDURE_DATASOURCE_NAME,
"method": TEST_PROCEDURE_SQL,
},
{
"method": TEST_PROCEDURE_SQL,
"type": "MYSQL",
"datasource": 1,
"localParams": [],
"resourceList": [],
"dependence": {},
"waitStartTimeout": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
},
)
],
)
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
@patch(
"pydolphinscheduler.core.database.Database.get_database_info",
return_value=({"id": 1, "type": "MYSQL"}),
)
def test_property_task_params(mock_datasource, mock_code_version, attr, expect):
"""Test task sql task property."""
task = Procedure(**attr)
assert expect == task.task_params
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
@patch(
"pydolphinscheduler.core.database.Database.get_database_info",
return_value=({"id": 1, "type": "MYSQL"}),
)
def test_sql_get_define(mock_datasource, mock_code_version):
"""Test task procedure function get_define."""
name = "test_procedure_get_define"
expect = {
"code": 123,
"name": name,
"version": 1,
"description": None,
"delayTime": 0,
"taskType": "PROCEDURE",
"taskParams": {
"type": "MYSQL",
"datasource": 1,
"method": TEST_PROCEDURE_SQL,
"localParams": [],
"resourceList": [],
"dependence": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
"waitStartTimeout": {},
},
"flag": "YES",
"taskPriority": "MEDIUM",
"workerGroup": "default",
"failRetryTimes": 0,
"failRetryInterval": 1,
"timeoutFlag": "CLOSE",
"timeoutNotifyStrategy": None,
"timeout": 0,
}
task = Procedure(name, TEST_PROCEDURE_DATASOURCE_NAME, TEST_PROCEDURE_SQL)
assert task.get_define() == expect

122
dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_python.py

@ -0,0 +1,122 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test Task python."""
from unittest.mock import patch
import pytest
from pydolphinscheduler.exceptions import PyDSParamException
from pydolphinscheduler.tasks.python import Python
@pytest.mark.parametrize(
"attr, expect",
[
(
{"code": "print(1)"},
{
"rawScript": "print(1)",
"localParams": [],
"resourceList": [],
"dependence": {},
"waitStartTimeout": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
},
)
],
)
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
def test_property_task_params(mock_code_version, attr, expect):
"""Test task python property."""
task = Python("test-python-task-params", **attr)
assert expect == task.task_params
@pytest.mark.parametrize(
"script_code",
[
123,
("print", "hello world"),
],
)
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
def test_python_task_not_support_code(mock_code, script_code):
"""Test python task parameters."""
name = "not_support_code_type"
with pytest.raises(PyDSParamException, match="Parameter code do not support .*?"):
task = Python(name, script_code)
task.raw_script
def foo(): # noqa: D103
print("hello world.")
@pytest.mark.parametrize(
"name, script_code, raw",
[
("string_define", 'print("hello world.")', 'print("hello world.")'),
(
"function_define",
foo,
'def foo(): # noqa: D103\n print("hello world.")\n',
),
],
)
def test_python_get_define(name, script_code, raw):
"""Test task python function get_define."""
code = 123
version = 1
expect = {
"code": code,
"name": name,
"version": 1,
"description": None,
"delayTime": 0,
"taskType": "PYTHON",
"taskParams": {
"resourceList": [],
"localParams": [],
"rawScript": raw,
"dependence": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
"waitStartTimeout": {},
},
"flag": "YES",
"taskPriority": "MEDIUM",
"workerGroup": "default",
"failRetryTimes": 0,
"failRetryInterval": 1,
"timeoutFlag": "CLOSE",
"timeoutNotifyStrategy": None,
"timeout": 0,
}
with patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(code, version),
):
shell = Python(name, script_code)
assert shell.get_define() == expect

89
dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_shell.py

@ -0,0 +1,89 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test Task shell."""
from unittest.mock import patch
import pytest
from pydolphinscheduler.tasks.shell import Shell
@pytest.mark.parametrize(
"attr, expect",
[
(
{"command": "test script"},
{
"rawScript": "test script",
"localParams": [],
"resourceList": [],
"dependence": {},
"waitStartTimeout": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
},
)
],
)
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
def test_property_task_params(mock_code_version, attr, expect):
"""Test task shell task property."""
task = Shell("test-shell-task-params", **attr)
assert expect == task.task_params
def test_shell_get_define():
"""Test task shell function get_define."""
code = 123
version = 1
name = "test_shell_get_define"
command = "echo test shell"
expect = {
"code": code,
"name": name,
"version": 1,
"description": None,
"delayTime": 0,
"taskType": "SHELL",
"taskParams": {
"resourceList": [],
"localParams": [],
"rawScript": command,
"dependence": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
"waitStartTimeout": {},
},
"flag": "YES",
"taskPriority": "MEDIUM",
"workerGroup": "default",
"failRetryTimes": 0,
"failRetryInterval": 1,
"timeoutFlag": "CLOSE",
"timeoutNotifyStrategy": None,
"timeout": 0,
}
with patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(code, version),
):
shell = Shell(name, command)
assert shell.get_define() == expect

149
dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sql.py

@ -0,0 +1,149 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test Task Sql."""
from unittest.mock import patch
import pytest
from pydolphinscheduler.tasks.sql import Sql, SqlType
@pytest.mark.parametrize(
"sql, sql_type",
[
("select 1", SqlType.SELECT),
(" select 1", SqlType.SELECT),
(" select 1 ", SqlType.SELECT),
(" select 'insert' ", SqlType.SELECT),
(" select 'insert ' ", SqlType.SELECT),
("with tmp as (select 1) select * from tmp ", SqlType.SELECT),
("insert into table_name(col1, col2) value (val1, val2)", SqlType.NOT_SELECT),
(
"insert into table_name(select, col2) value ('select', val2)",
SqlType.NOT_SELECT,
),
("update table_name SET col1=val1 where col1=val2", SqlType.NOT_SELECT),
("update table_name SET col1='select' where col1=val2", SqlType.NOT_SELECT),
("delete from table_name where id < 10", SqlType.NOT_SELECT),
("delete from table_name where id < 10", SqlType.NOT_SELECT),
("alter table table_name add column col1 int", SqlType.NOT_SELECT),
],
)
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
@patch(
"pydolphinscheduler.core.database.Database.get_database_info",
return_value=({"id": 1, "type": "mock_type"}),
)
def test_get_sql_type(mock_datasource, mock_code_version, sql, sql_type):
"""Test property sql_type could return correct type."""
name = "test_get_sql_type"
datasource_name = "test_datasource"
task = Sql(name, datasource_name, sql)
assert (
sql_type == task.sql_type
), f"Sql {sql} expect sql type is {sql_type} but got {task.sql_type}"
@pytest.mark.parametrize(
"attr, expect",
[
(
{"datasource_name": "datasource_name", "sql": "select 1"},
{
"sql": "select 1",
"type": "MYSQL",
"datasource": 1,
"sqlType": SqlType.SELECT,
"preStatements": [],
"postStatements": [],
"displayRows": 10,
"localParams": [],
"resourceList": [],
"dependence": {},
"waitStartTimeout": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
},
)
],
)
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
@patch(
"pydolphinscheduler.core.database.Database.get_database_info",
return_value=({"id": 1, "type": "MYSQL"}),
)
def test_property_task_params(mock_datasource, mock_code_version, attr, expect):
"""Test task sql task property."""
task = Sql("test-sql-task-params", **attr)
assert expect == task.task_params
@patch(
"pydolphinscheduler.core.database.Database.get_database_info",
return_value=({"id": 1, "type": "MYSQL"}),
)
def test_sql_get_define(mock_datasource):
"""Test task sql function get_define."""
code = 123
version = 1
name = "test_sql_get_define"
command = "select 1"
datasource_name = "test_datasource"
expect = {
"code": code,
"name": name,
"version": 1,
"description": None,
"delayTime": 0,
"taskType": "SQL",
"taskParams": {
"type": "MYSQL",
"datasource": 1,
"sql": command,
"sqlType": SqlType.SELECT,
"displayRows": 10,
"preStatements": [],
"postStatements": [],
"localParams": [],
"resourceList": [],
"dependence": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
"waitStartTimeout": {},
},
"flag": "YES",
"taskPriority": "MEDIUM",
"workerGroup": "default",
"failRetryTimes": 0,
"failRetryInterval": 1,
"timeoutFlag": "CLOSE",
"timeoutNotifyStrategy": None,
"timeout": 0,
}
with patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(code, version),
):
task = Sql(name, datasource_name, command)
assert task.get_define() == expect

114
dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_sub_process.py

@ -0,0 +1,114 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test Task sub_process."""
from unittest.mock import patch
import pytest
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.sub_process import SubProcess
TEST_SUB_PROCESS_DEFINITION_NAME = "sub-test-process-definition"
TEST_SUB_PROCESS_DEFINITION_CODE = "3643589832320"
TEST_PROCESS_DEFINITION_NAME = "simple-test-process-definition"
@pytest.mark.parametrize(
"attr, expect",
[
(
{"process_definition_name": TEST_SUB_PROCESS_DEFINITION_NAME},
{
"processDefinitionCode": TEST_SUB_PROCESS_DEFINITION_CODE,
"localParams": [],
"resourceList": [],
"dependence": {},
"waitStartTimeout": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
},
)
],
)
@patch(
"pydolphinscheduler.tasks.sub_process.SubProcess.get_process_definition_info",
return_value=(
{
"id": 1,
"name": TEST_SUB_PROCESS_DEFINITION_NAME,
"code": TEST_SUB_PROCESS_DEFINITION_CODE,
}
),
)
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
def test_property_task_params(mock_code_version, mock_pd_info, attr, expect):
"""Test task sub process property."""
task = SubProcess("test-sub-process-task-params", **attr)
assert expect == task.task_params
@patch(
"pydolphinscheduler.tasks.sub_process.SubProcess.get_process_definition_info",
return_value=(
{
"id": 1,
"name": TEST_SUB_PROCESS_DEFINITION_NAME,
"code": TEST_SUB_PROCESS_DEFINITION_CODE,
}
),
)
def test_sub_process_get_define(mock_process_definition):
"""Test task sub_process function get_define."""
code = 123
version = 1
name = "test_sub_process_get_define"
expect = {
"code": code,
"name": name,
"version": 1,
"description": None,
"delayTime": 0,
"taskType": "SUB_PROCESS",
"taskParams": {
"resourceList": [],
"localParams": [],
"processDefinitionCode": TEST_SUB_PROCESS_DEFINITION_CODE,
"dependence": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
"waitStartTimeout": {},
},
"flag": "YES",
"taskPriority": "MEDIUM",
"workerGroup": "default",
"failRetryTimes": 0,
"failRetryInterval": 1,
"timeoutFlag": "CLOSE",
"timeoutNotifyStrategy": None,
"timeout": 0,
}
with patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(code, version),
):
with ProcessDefinition(TEST_PROCESS_DEFINITION_NAME):
sub_process = SubProcess(name, TEST_SUB_PROCESS_DEFINITION_NAME)
assert sub_process.get_define() == expect

300
dolphinscheduler-python/pydolphinscheduler/tests/tasks/test_switch.py

@ -0,0 +1,300 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test Task switch."""
from typing import Optional, Tuple
from unittest.mock import patch
import pytest
from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.exceptions import PyDSParamException
from pydolphinscheduler.tasks.switch import (
Branch,
Default,
Switch,
SwitchBranch,
SwitchCondition,
)
from tests.testing.task import Task
TEST_NAME = "test-task"
TEST_TYPE = "test-type"
def task_switch_arg_wrapper(obj, task: Task, exp: Optional[str] = None) -> SwitchBranch:
"""Wrap task switch and its subclass."""
if obj is Default:
return obj(task)
elif obj is Branch:
return obj(exp, task)
else:
return obj(task, exp)
@pytest.mark.parametrize(
"obj",
[
SwitchBranch,
Branch,
Default,
],
)
def test_switch_branch_attr_next_node(obj: SwitchBranch):
"""Test get attribute from class switch branch."""
task = Task(name=TEST_NAME, task_type=TEST_TYPE)
switch_branch = task_switch_arg_wrapper(obj, task=task, exp="unittest")
assert switch_branch.next_node == task.code
@pytest.mark.parametrize(
"obj",
[
SwitchBranch,
Default,
],
)
def test_switch_branch_get_define_without_condition(obj: SwitchBranch):
"""Test function :func:`get_define` with None value of attribute condition from class switch branch."""
task = Task(name=TEST_NAME, task_type=TEST_TYPE)
expect = {"nextNode": task.code}
switch_branch = task_switch_arg_wrapper(obj, task=task)
assert switch_branch.get_define() == expect
@pytest.mark.parametrize(
"obj",
[
SwitchBranch,
Branch,
],
)
def test_switch_branch_get_define_condition(obj: SwitchBranch):
"""Test function :func:`get_define` with specific attribute condition from class switch branch."""
task = Task(name=TEST_NAME, task_type=TEST_TYPE)
exp = "${var} == 1"
expect = {
"nextNode": task.code,
"condition": exp,
}
switch_branch = task_switch_arg_wrapper(obj, task=task, exp=exp)
assert switch_branch.get_define() == expect
@pytest.mark.parametrize(
"args, msg",
[
(
(1,),
".*?parameter only support SwitchBranch but got.*?",
),
(
(Default(Task(TEST_NAME, TEST_TYPE)), 2),
".*?parameter only support SwitchBranch but got.*?",
),
(
(Default(Task(TEST_NAME, TEST_TYPE)), Default(Task(TEST_NAME, TEST_TYPE))),
".*?parameter only support exactly one default branch",
),
(
(
Branch(condition="unittest", task=Task(TEST_NAME, TEST_TYPE)),
Default(Task(TEST_NAME, TEST_TYPE)),
Default(Task(TEST_NAME, TEST_TYPE)),
),
".*?parameter only support exactly one default branch",
),
],
)
def test_switch_condition_set_define_attr_error(args: Tuple, msg: str):
"""Test error case on :class:`SwitchCondition`."""
switch_condition = SwitchCondition(*args)
with pytest.raises(PyDSParamException, match=msg):
switch_condition.set_define_attr()
def test_switch_condition_set_define_attr_default():
"""Test set :class:`Default` to attribute on :class:`SwitchCondition`."""
task = Task(TEST_NAME, TEST_TYPE)
switch_condition = SwitchCondition(Default(task))
switch_condition.set_define_attr()
assert getattr(switch_condition, "next_node") == task.code
assert getattr(switch_condition, "depend_task_list") == []
def test_switch_condition_set_define_attr_branch():
"""Test set :class:`Branch` to attribute on :class:`SwitchCondition`."""
task = Task(TEST_NAME, TEST_TYPE)
switch_condition = SwitchCondition(
Branch("unittest1", task), Branch("unittest2", task)
)
expect = [
{"condition": "unittest1", "nextNode": task.code},
{"condition": "unittest2", "nextNode": task.code},
]
switch_condition.set_define_attr()
assert getattr(switch_condition, "next_node") == ""
assert getattr(switch_condition, "depend_task_list") == expect
def test_switch_condition_set_define_attr_mix_branch_and_default():
"""Test set bot :class:`Branch` and :class:`Default` to attribute on :class:`SwitchCondition`."""
task = Task(TEST_NAME, TEST_TYPE)
switch_condition = SwitchCondition(
Branch("unittest1", task), Branch("unittest2", task), Default(task)
)
expect = [
{"condition": "unittest1", "nextNode": task.code},
{"condition": "unittest2", "nextNode": task.code},
]
switch_condition.set_define_attr()
assert getattr(switch_condition, "next_node") == task.code
assert getattr(switch_condition, "depend_task_list") == expect
def test_switch_condition_get_define_default():
"""Test function :func:`get_define` with :class:`Default` in :class:`SwitchCondition`."""
task = Task(TEST_NAME, TEST_TYPE)
switch_condition = SwitchCondition(Default(task))
expect = {
"dependTaskList": [],
"nextNode": task.code,
}
assert switch_condition.get_define() == expect
def test_switch_condition_get_define_branch():
"""Test function :func:`get_define` with :class:`Branch` in :class:`SwitchCondition`."""
task = Task(TEST_NAME, TEST_TYPE)
switch_condition = SwitchCondition(
Branch("unittest1", task), Branch("unittest2", task)
)
expect = {
"dependTaskList": [
{"condition": "unittest1", "nextNode": task.code},
{"condition": "unittest2", "nextNode": task.code},
],
"nextNode": "",
}
assert switch_condition.get_define() == expect
def test_switch_condition_get_define_mix_branch_and_default():
"""Test function :func:`get_define` with both :class:`Branch` and :class:`Default`."""
task = Task(TEST_NAME, TEST_TYPE)
switch_condition = SwitchCondition(
Branch("unittest1", task), Branch("unittest2", task), Default(task)
)
expect = {
"dependTaskList": [
{"condition": "unittest1", "nextNode": task.code},
{"condition": "unittest2", "nextNode": task.code},
],
"nextNode": task.code,
}
assert switch_condition.get_define() == expect
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
def test_switch_get_define(mock_task_code_version):
"""Test task switch :func:`get_define`."""
task = Task(name=TEST_NAME, task_type=TEST_TYPE)
switch_condition = SwitchCondition(
Branch(condition="${var1} > 1", task=task),
Branch(condition="${var1} <= 1", task=task),
Default(task),
)
name = "test_switch_get_define"
expect = {
"code": 123,
"name": name,
"version": 1,
"description": None,
"delayTime": 0,
"taskType": "SWITCH",
"taskParams": {
"resourceList": [],
"localParams": [],
"dependence": {},
"conditionResult": {"successNode": [""], "failedNode": [""]},
"waitStartTimeout": {},
"switchResult": {
"dependTaskList": [
{"condition": "${var1} > 1", "nextNode": task.code},
{"condition": "${var1} <= 1", "nextNode": task.code},
],
"nextNode": task.code,
},
},
"flag": "YES",
"taskPriority": "MEDIUM",
"workerGroup": "default",
"failRetryTimes": 0,
"failRetryInterval": 1,
"timeoutFlag": "CLOSE",
"timeoutNotifyStrategy": None,
"timeout": 0,
}
task = Switch(name, condition=switch_condition)
assert task.get_define() == expect
@patch(
"pydolphinscheduler.core.task.Task.gen_code_and_version",
return_value=(123, 1),
)
def test_switch_set_dep_workflow(mock_task_code_version):
"""Test task switch set dependence in workflow level."""
with ProcessDefinition(name="test-switch-set-dep-workflow") as pd:
parent = Task(name="parent", task_type=TEST_TYPE)
switch_child_1 = Task(name="switch_child_1", task_type=TEST_TYPE)
switch_child_2 = Task(name="switch_child_2", task_type=TEST_TYPE)
switch_condition = SwitchCondition(
Branch(condition="${var} > 1", task=switch_child_1),
Default(task=switch_child_2),
)
switch = Switch(name=TEST_NAME, condition=switch_condition)
parent >> switch
# General tasks test
assert len(pd.tasks) == 4
assert sorted(pd.task_list, key=lambda t: t.name) == sorted(
[parent, switch, switch_child_1, switch_child_2], key=lambda t: t.name
)
# Task dep test
assert parent._downstream_task_codes == {switch.code}
assert switch._upstream_task_codes == {parent.code}
# Switch task dep after ProcessDefinition function get_define called
assert switch._downstream_task_codes == {
switch_child_1.code,
switch_child_2.code,
}
assert all(
[
child._upstream_task_codes == {switch.code}
for child in [switch_child_1, switch_child_2]
]
)

52
dolphinscheduler-python/pydolphinscheduler/tests/test_java_gateway.py

@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test pydolphinscheduler java gateway."""
from py4j.java_gateway import JavaGateway, java_import
def test_gateway_connect():
"""Test weather client could connect java gate way or not."""
gateway = JavaGateway()
app = gateway.entry_point
assert app.ping() == "PONG"
def test_jvm_simple():
"""Test use JVM build-in object and operator from java gateway."""
gateway = JavaGateway()
smaller = gateway.jvm.java.lang.Integer.MIN_VALUE
bigger = gateway.jvm.java.lang.Integer.MAX_VALUE
assert bigger > smaller
def test_python_client_java_import_single():
"""Test import single class from java gateway."""
gateway = JavaGateway()
java_import(gateway.jvm, "org.apache.dolphinscheduler.common.utils.FileUtils")
assert hasattr(gateway.jvm, "FileUtils")
def test_python_client_java_import_package():
"""Test import package contain multiple class from java gateway."""
gateway = JavaGateway()
java_import(gateway.jvm, "org.apache.dolphinscheduler.common.utils.*")
# test if jvm view have some common utils
for util in ("FileUtils", "OSUtils", "DateUtils"):
assert hasattr(gateway.jvm, util)

18
dolphinscheduler-python/pydolphinscheduler/tests/testing/__init__.py

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init testing package, it provider easy way for pydolphinscheduler test."""

32
dolphinscheduler-python/pydolphinscheduler/tests/testing/task.py

@ -0,0 +1,32 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Mock class Task for other test."""
import uuid
from pydolphinscheduler.core.task import Task as SourceTask
class Task(SourceTask):
"""Mock class :class:`pydolphinscheduler.core.task.Task` for unittest."""
DEFAULT_VERSION = 1
def gen_code_and_version(self):
"""Mock java gateway code and version, convenience method for unittest."""
return uuid.uuid1().time, self.DEFAULT_VERSION

18
dolphinscheduler-python/pydolphinscheduler/tests/utils/__init__.py

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Init tests for utils package."""

78
dolphinscheduler-python/pydolphinscheduler/tests/utils/test_date.py

@ -0,0 +1,78 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test utils.date module."""
from datetime import datetime
import pytest
from pydolphinscheduler.utils.date import FMT_STD, conv_from_str, conv_to_schedule
curr_date = datetime.now()
@pytest.mark.parametrize(
"src,expect",
[
(curr_date, curr_date.strftime(FMT_STD)),
(datetime(2021, 1, 1), "2021-01-01 00:00:00"),
(datetime(2021, 1, 1, 1), "2021-01-01 01:00:00"),
(datetime(2021, 1, 1, 1, 1), "2021-01-01 01:01:00"),
(datetime(2021, 1, 1, 1, 1, 1), "2021-01-01 01:01:01"),
(datetime(2021, 1, 1, 1, 1, 1, 1), "2021-01-01 01:01:01"),
],
)
def test_conv_to_schedule(src: datetime, expect: str) -> None:
"""Test function conv_to_schedule."""
assert expect == conv_to_schedule(src)
@pytest.mark.parametrize(
"src,expect",
[
("2021-01-01", datetime(2021, 1, 1)),
("2021/01/01", datetime(2021, 1, 1)),
("20210101", datetime(2021, 1, 1)),
("2021-01-01 01:01:01", datetime(2021, 1, 1, 1, 1, 1)),
("2021/01/01 01:01:01", datetime(2021, 1, 1, 1, 1, 1)),
("20210101 010101", datetime(2021, 1, 1, 1, 1, 1)),
],
)
def test_conv_from_str_success(src: str, expect: datetime) -> None:
"""Test function conv_from_str success case."""
assert expect == conv_from_str(
src
), f"Function conv_from_str convert {src} not expect to {expect}."
@pytest.mark.parametrize(
"src",
[
"2021-01-01 010101",
"2021:01:01",
"202111",
"20210101010101",
"2021:01:01 01:01:01",
],
)
def test_conv_from_str_not_impl(src: str) -> None:
"""Test function conv_from_str fail case."""
with pytest.raises(
NotImplementedError, match=".*? could not be convert to datetime for now."
):
conv_from_str(src)

87
dolphinscheduler-python/pydolphinscheduler/tests/utils/test_string.py

@ -0,0 +1,87 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Test utils.string module."""
import pytest
from pydolphinscheduler.utils.string import attr2camel, class_name2camel, snake2camel
@pytest.mark.parametrize(
"snake, expect",
[
("snake_case", "snakeCase"),
("snake_123case", "snake123Case"),
("snake_c_a_s_e", "snakeCASE"),
("snake__case", "snakeCase"),
("snake_case_case", "snakeCaseCase"),
("_snake_case", "SnakeCase"),
("__snake_case", "SnakeCase"),
("Snake_case", "SnakeCase"),
],
)
def test_snake2camel(snake: str, expect: str):
"""Test function snake2camel, this is a base function for utils.string."""
assert expect == snake2camel(
snake
), f"Test case {snake} do no return expect result {expect}."
@pytest.mark.parametrize(
"attr, expects",
[
# source attribute, (true expect, false expect),
("snake_case", ("snakeCase", "snakeCase")),
("snake_123case", ("snake123Case", "snake123Case")),
("snake_c_a_s_e", ("snakeCASE", "snakeCASE")),
("snake__case", ("snakeCase", "snakeCase")),
("snake_case_case", ("snakeCaseCase", "snakeCaseCase")),
("_snake_case", ("snakeCase", "SnakeCase")),
("__snake_case", ("snakeCase", "SnakeCase")),
("Snake_case", ("SnakeCase", "SnakeCase")),
],
)
def test_attr2camel(attr: str, expects: tuple):
"""Test function attr2camel."""
for idx, expect in enumerate(expects):
include_private = idx % 2 == 0
assert expect == attr2camel(
attr, include_private
), f"Test case {attr} do no return expect result {expect} when include_private is {include_private}."
@pytest.mark.parametrize(
"class_name, expect",
[
("snake_case", "snakeCase"),
("snake_123case", "snake123Case"),
("snake_c_a_s_e", "snakeCASE"),
("snake__case", "snakeCase"),
("snake_case_case", "snakeCaseCase"),
("_snake_case", "snakeCase"),
("_Snake_case", "snakeCase"),
("__snake_case", "snakeCase"),
("__Snake_case", "snakeCase"),
("Snake_case", "snakeCase"),
],
)
def test_class_name2camel(class_name: str, expect: str):
"""Test function class_name2camel."""
assert expect == class_name2camel(
class_name
), f"Test case {class_name} do no return expect result {expect}."

161
dolphinscheduler-python/src/main/java/org/apache/dolphinscheduler/server/PythonGatewayServer.java

@ -30,13 +30,13 @@ import org.apache.dolphinscheduler.api.utils.Result;
import org.apache.dolphinscheduler.common.Constants;
import org.apache.dolphinscheduler.common.enums.FailureStrategy;
import org.apache.dolphinscheduler.common.enums.Priority;
import org.apache.dolphinscheduler.common.enums.ProcessExecutionTypeEnum;
import org.apache.dolphinscheduler.common.enums.ReleaseState;
import org.apache.dolphinscheduler.common.enums.RunMode;
import org.apache.dolphinscheduler.common.enums.TaskDependType;
import org.apache.dolphinscheduler.common.enums.UserType;
import org.apache.dolphinscheduler.common.enums.WarningType;
import org.apache.dolphinscheduler.common.utils.CodeGenerateUtils;
import org.apache.dolphinscheduler.dao.entity.DataSource;
import org.apache.dolphinscheduler.dao.entity.ProcessDefinition;
import org.apache.dolphinscheduler.dao.entity.Project;
import org.apache.dolphinscheduler.dao.entity.Queue;
@ -44,6 +44,7 @@ import org.apache.dolphinscheduler.dao.entity.Schedule;
import org.apache.dolphinscheduler.dao.entity.TaskDefinition;
import org.apache.dolphinscheduler.dao.entity.Tenant;
import org.apache.dolphinscheduler.dao.entity.User;
import org.apache.dolphinscheduler.dao.mapper.DataSourceMapper;
import org.apache.dolphinscheduler.dao.mapper.ProcessDefinitionMapper;
import org.apache.dolphinscheduler.dao.mapper.ProjectMapper;
import org.apache.dolphinscheduler.dao.mapper.ScheduleMapper;
@ -60,24 +61,17 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.web.servlet.support.SpringBootServletInitializer;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.FilterType;
import py4j.GatewayServer;
@ComponentScan(value = "org.apache.dolphinscheduler", excludeFilters = {
@ComponentScan.Filter(type = FilterType.REGEX, pattern = {
"org.apache.dolphinscheduler.server.master.*",
"org.apache.dolphinscheduler.server.worker.*",
"org.apache.dolphinscheduler.server.monitor.*",
"org.apache.dolphinscheduler.server.log.*",
"org.apache.dolphinscheduler.alert.*"
})
})
@SpringBootApplication
@ComponentScan(value = "org.apache.dolphinscheduler")
public class PythonGatewayServer extends SpringBootServletInitializer {
private static final Logger LOGGER = LoggerFactory.getLogger(PythonGatewayServer.class);
private static final WarningType DEFAULT_WARNING_TYPE = WarningType.NONE;
private static final int DEFAULT_WARNING_GROUP_ID = 0;
private static final FailureStrategy DEFAULT_FAILURE_STRATEGY = FailureStrategy.CONTINUE;
@ -124,6 +118,9 @@ public class PythonGatewayServer extends SpringBootServletInitializer {
@Autowired
private ScheduleMapper scheduleMapper;
@Autowired
private DataSourceMapper dataSourceMapper;
// TODO replace this user to build in admin user if we make sure build in one could not be change
private final User dummyAdminUser = new User() {
{
@ -202,34 +199,26 @@ public class PythonGatewayServer extends SpringBootServletInitializer {
String workerGroup,
String tenantCode,
String taskRelationJson,
String taskDefinitionJson,
ProcessExecutionTypeEnum executionType) {
String taskDefinitionJson) {
User user = usersService.queryUser(userName);
Project project = (Project) projectService.queryByName(user, projectName).get(Constants.DATA_LIST);
long projectCode = project.getCode();
Map<String, Object> verifyProcessDefinitionExists = processDefinitionService.verifyProcessDefinitionName(user, projectCode, name);
Status verifyStatus = (Status) verifyProcessDefinitionExists.get(Constants.STATUS);
ProcessDefinition processDefinition = getProcessDefinition(user, projectCode, name);
long processDefinitionCode;
// create or update process definition
if (verifyStatus == Status.PROCESS_DEFINITION_NAME_EXIST) {
ProcessDefinition processDefinition = processDefinitionMapper.queryByDefineName(projectCode, name);
if (processDefinition != null) {
processDefinitionCode = processDefinition.getCode();
// make sure process definition offline which could edit
processDefinitionService.releaseProcessDefinition(user, projectCode, processDefinitionCode, ReleaseState.OFFLINE);
Map<String, Object> result = processDefinitionService.updateProcessDefinition(user, projectCode, name, processDefinitionCode, description, globalParams,
locations, timeout, tenantCode, taskRelationJson, taskDefinitionJson,executionType);
} else if (verifyStatus == Status.SUCCESS) {
locations, timeout, tenantCode, taskRelationJson, taskDefinitionJson);
} else {
Map<String, Object> result = processDefinitionService.createProcessDefinition(user, projectCode, name, description, globalParams,
locations, timeout, tenantCode, taskRelationJson, taskDefinitionJson,executionType);
ProcessDefinition processDefinition = (ProcessDefinition) result.get(Constants.DATA_LIST);
locations, timeout, tenantCode, taskRelationJson, taskDefinitionJson);
processDefinition = (ProcessDefinition) result.get(Constants.DATA_LIST);
processDefinitionCode = processDefinition.getCode();
} else {
String msg = "Verify process definition exists status is invalid, neither SUCCESS or PROCESS_DEFINITION_NAME_EXIST.";
LOGGER.error(msg);
throw new RuntimeException(msg);
}
// Fresh process definition schedule
if (schedule != null) {
createOrUpdateSchedule(user, projectCode, processDefinitionCode, schedule, workerGroup);
@ -238,6 +227,28 @@ public class PythonGatewayServer extends SpringBootServletInitializer {
return processDefinitionCode;
}
/**
* get process definition
* @param user user who create or update schedule
* @param projectCode project which process definition belongs to
* @param processDefinitionName process definition name
*/
private ProcessDefinition getProcessDefinition(User user, long projectCode, String processDefinitionName) {
Map<String, Object> verifyProcessDefinitionExists = processDefinitionService.verifyProcessDefinitionName(user, projectCode, processDefinitionName);
Status verifyStatus = (Status) verifyProcessDefinitionExists.get(Constants.STATUS);
ProcessDefinition processDefinition = null;
if (verifyStatus == Status.PROCESS_DEFINITION_NAME_EXIST) {
processDefinition = processDefinitionMapper.queryByDefineName(projectCode, processDefinitionName);
} else if (verifyStatus != Status.SUCCESS) {
String msg = "Verify process definition exists status is invalid, neither SUCCESS or PROCESS_DEFINITION_NAME_EXIST.";
LOGGER.error(msg);
throw new RuntimeException(msg);
}
return processDefinition;
}
/**
* create or update process definition schedule.
* It would always use latest schedule define in workflow-as-code, and set schedule online when
@ -360,11 +371,103 @@ public class PythonGatewayServer extends SpringBootServletInitializer {
}
}
/**
* Get datasource by given datasource name. It return map contain datasource id, type, name.
* Useful in Python API create sql task which need datasource information.
*
* @param datasourceName user who create or update schedule
*/
public Map<String, Object> getDatasourceInfo(String datasourceName) {
Map<String, Object> result = new HashMap<>();
List<DataSource> dataSourceList = dataSourceMapper.queryDataSourceByName(datasourceName);
if (dataSourceList.size() > 1) {
String msg = String.format("Get more than one datasource by name %s", datasourceName);
logger.error(msg);
throw new IllegalArgumentException(msg);
} else if (dataSourceList.size() == 0) {
String msg = String.format("Can not find any datasource by name %s", datasourceName);
logger.error(msg);
throw new IllegalArgumentException(msg);
} else {
DataSource dataSource = dataSourceList.get(0);
result.put("id", dataSource.getId());
result.put("type", dataSource.getType().name());
result.put("name", dataSource.getName());
}
return result;
}
/**
* Get processDefinition by given processDefinitionName name. It return map contain processDefinition id, name, code.
* Useful in Python API create subProcess task which need processDefinition information.
*
* @param userName user who create or update schedule
* @param projectName project name which process definition belongs to
* @param processDefinitionName process definition name
*/
public Map<String, Object> getProcessDefinitionInfo(String userName, String projectName, String processDefinitionName) {
Map<String, Object> result = new HashMap<>();
User user = usersService.queryUser(userName);
Project project = (Project) projectService.queryByName(user, projectName).get(Constants.DATA_LIST);
long projectCode = project.getCode();
ProcessDefinition processDefinition = getProcessDefinition(user, projectCode, processDefinitionName);
// get process definition info
if (processDefinition != null) {
// make sure process definition online
processDefinitionService.releaseProcessDefinition(user, projectCode, processDefinition.getCode(), ReleaseState.ONLINE);
result.put("id", processDefinition.getId());
result.put("name", processDefinition.getName());
result.put("code", processDefinition.getCode());
} else {
String msg = String.format("Can not find valid process definition by name %s", processDefinitionName);
logger.error(msg);
throw new IllegalArgumentException(msg);
}
return result;
}
/**
* Get project, process definition, task code.
* Useful in Python API create dependent task which need processDefinition information.
*
* @param projectName project name which process definition belongs to
* @param processDefinitionName process definition name
* @param taskName task name
*/
public Map<String, Object> getDependentInfo(String projectName, String processDefinitionName, String taskName) {
Map<String, Object> result = new HashMap<>();
Project project = projectMapper.queryByName(projectName);
if (project == null) {
String msg = String.format("Can not find valid project by name %s", projectName);
logger.error(msg);
throw new IllegalArgumentException(msg);
}
long projectCode = project.getCode();
result.put("projectCode", projectCode);
ProcessDefinition processDefinition = processDefinitionMapper.queryByDefineName(projectCode, processDefinitionName);
if (processDefinition == null) {
String msg = String.format("Can not find valid process definition by name %s", processDefinitionName);
logger.error(msg);
throw new IllegalArgumentException(msg);
}
result.put("processDefinitionCode", processDefinition.getCode());
if (taskName != null) {
TaskDefinition taskDefinition = taskDefinitionMapper.queryByName(projectCode, taskName);
result.put("taskDefinitionCode", taskDefinition.getCode());
}
return result;
}
@PostConstruct
public void run() {
GatewayServer server = new GatewayServer(this);
GatewayServer.turnLoggingOn();
// Start server to accept python client RPC
// Start server to accept python client socket
server.start();
}

5
dolphinscheduler-server/src/main/resources/config/install_config.conf

@ -48,6 +48,11 @@ alertServer="ds3"
# Example for hostname: apiServers="ds1", Example for IP: apiServers="192.168.8.1"
apiServers="ds1"
# A comma separated list of machine hostname or IP would be installed Python gateway server, it
# must be a subset of configuration `ips`.
# Example for hostname: pythonGatewayServers="ds1", Example for IP: pythonGatewayServers="192.168.8.1"
pythonGatewayServers="ds1"
# The directory to install DolphinScheduler for all machine we config above. It will automatically be created by `install.sh` script if not exists.
# Do not set this configuration same as the current path (pwd)
installPath="/data1_1T/dolphinscheduler"

4
dolphinscheduler-standalone-server/pom.xml

@ -34,6 +34,10 @@
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-python</artifactId>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-test</artifactId>

2
dolphinscheduler-standalone-server/src/main/java/org/apache/dolphinscheduler/server/StandaloneServer.java

@ -40,6 +40,6 @@ public class StandaloneServer {
MasterServer.class,
WorkerServer.class,
AlertServer.class
).profiles("master", "worker", "api", "alert", "h2", "standalone").run(args);
).profiles("master", "worker", "api", "alert", "python-gateway", "h2", "standalone").run(args);
}
}

1
pom.xml

@ -1142,5 +1142,6 @@
<module>dolphinscheduler-service</module>
<module>dolphinscheduler-standalone-server</module>
<module>dolphinscheduler-datasource-plugin</module>
<module>dolphinscheduler-python</module>
</modules>
</project>

5
script/dolphinscheduler-daemon.sh

@ -16,7 +16,7 @@
# limitations under the License.
#
usage="Usage: dolphinscheduler-daemon.sh (start|stop|status) <api-server|master-server|worker-server|alert-server|standalone-server> "
usage="Usage: dolphinscheduler-daemon.sh (start|stop|status) <api-server|master-server|worker-server|alert-server|standalone-server|python-gateway-server> "
# if no args specified, show usage
if [ $# -le 1 ]; then
@ -96,6 +96,9 @@ elif [ "$command" = "logger-server" ]; then
elif [ "$command" = "standalone-server" ]; then
CLASS=org.apache.dolphinscheduler.server.StandaloneServer
export SPRING_PROFILES_ACTIVE="${SPRING_PROFILES_ACTIVE},standalone,${DATABASE_TYPE}"
elif [ "$command" = "python-gateway-server" ]; then
CLASS=org.apache.dolphinscheduler.server.PythonGatewayServer
export SPRING_PROFILES_ACTIVE="${SPRING_PROFILES_ACTIVE},python-gateway,${DATABASE_TYPE}"
else
echo "Error: No command named '$command' was found."
exit 1

7
script/start-all.sh

@ -59,6 +59,13 @@ do
ssh -p $sshPort $apiServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh start api-server;"
done
pythonGatewayServersHost=(${pythonGatewayServers//,/ })
for pythonGatewayServer in ${pythonGatewayServersHost[@]}
do
echo "$pythonGatewayServer worker server is starting"
ssh -p $sshPort $pythonGatewayServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh start python-gateway-server;"
done
# query server status
echo "query server status"
cd $installPath/; sh bin/status-all.sh

8
script/status-all.sh

@ -78,3 +78,11 @@ do
apiState=`ssh -p $sshPort $apiServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh status api-server;"`
echo "$apiServer $apiState"
done
# 5.python gateway server check state
pythonGatewayServersHost=(${pythonGatewayServers//,/ })
for pythonGateway in ${pythonGatewayServersHost[@]}
do
pythonGatewayState=`ssh -p $sshPort $pythonGateway "cd $installPath/; sh bin/dolphinscheduler-daemon.sh status python-gateway-server;"`
echo "$pythonGateway $pythonGatewayState"
done

7
script/stop-all.sh

@ -58,6 +58,13 @@ do
ssh -p $sshPort $apiServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh stop api-server;"
done
pythonGatewayServersHost=(${pythonGatewayServers//,/ })
for pythonGatewayServer in ${pythonGatewayServersHost[@]}
do
echo "$pythonGatewayServer python gateway server is stopping"
ssh -p $sshPort $pythonGatewayServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh stop python-gateway-server;"
done
# query server status
echo "query server status"
cd $installPath/; sh bin/status-all.sh

1
tools/dependencies/known-dependencies.txt

@ -171,6 +171,7 @@ protostuff-core-1.7.2.jar
protostuff-runtime-1.7.2.jar
protostuff-api-1.7.2.jar
protostuff-collectionschema-1.7.2.jar
py4j-0.10.9.jar
quartz-2.3.0.jar
quartz-jobs-2.3.0.jar
reflections-0.9.12.jar

Loading…
Cancel
Save