Browse Source

[python] Migrate pythonGatewayServer into api server (#9372)

Currently the size of our distribute package is up to
800MB, this patch is migrate python gateway server into
api server

The distribute package size before and after this patch is:

```sh
# before
796M   apache-dolphinscheduler-2.0.4-SNAPSHOT-bin.tar.gz

# after
647M   apache-dolphinscheduler-2.0.4-SNAPSHOT-bin.tar.gz
```
3.0.0/version-upgrade
Jiajie Zhong 3 years ago committed by GitHub
parent
commit
3457cee960
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 23
      deploy/docker/docker-compose.yml
  2. 21
      deploy/docker/docker-stack.yml
  3. 3
      deploy/kubernetes/dolphinscheduler/templates/_helpers.tpl
  4. 6
      docs/docs/en/faq.md
  5. 4
      docs/docs/en/guide/installation/pseudo-cluster.md
  6. 4
      docs/docs/en/guide/installation/standalone.md
  7. 2
      docs/docs/en/guide/upgrade.md
  8. 6
      dolphinscheduler-api/pom.xml
  9. 2
      dolphinscheduler-api/src/main/docker/Dockerfile
  10. 16
      dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/configuration/PythonGatewayConfiguration.java
  11. 54
      dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/python/PythonGateway.java
  12. 20
      dolphinscheduler-api/src/main/resources/application.yaml
  13. 5
      dolphinscheduler-dist/src/main/assembly/dolphinscheduler-bin.xml
  14. 75
      dolphinscheduler-python/pom.xml
  15. 3
      dolphinscheduler-python/pydolphinscheduler/UPDATING.md
  16. 28
      dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst
  17. 2
      dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst
  18. 2
      dolphinscheduler-python/pydolphinscheduler/tests/integration/__init__.py
  19. 4
      dolphinscheduler-python/pydolphinscheduler/tests/integration/test_submit_examples.py
  20. 64
      dolphinscheduler-python/src/main/assembly/dolphinscheduler-python-gateway-server.xml
  21. 32
      dolphinscheduler-python/src/main/bin/start.sh
  22. 34
      dolphinscheduler-python/src/main/docker/Dockerfile
  23. 83
      dolphinscheduler-python/src/main/resources/application.yaml
  24. 57
      dolphinscheduler-python/src/main/resources/logback-spring.xml
  25. 4
      dolphinscheduler-standalone-server/src/main/assembly/dolphinscheduler-standalone-server.xml
  26. 2
      dolphinscheduler-standalone-server/src/main/dist-bin/start.sh
  27. 2
      dolphinscheduler-standalone-server/src/main/resources/application.yaml
  28. 4
      script/dolphinscheduler-daemon.sh
  29. 5
      script/env/install_env.sh
  30. 2
      script/scp-hosts.sh
  31. 7
      script/start-all.sh
  32. 8
      script/status-all.sh
  33. 7
      script/stop-all.sh

23
deploy/docker/docker-compose.yml

@ -140,29 +140,6 @@ services:
networks: networks:
- dolphinscheduler - dolphinscheduler
dolphinscheduler-python-gateway:
image: ${HUB}/dolphinscheduler-python:${TAG}
ports:
- "54321:54321"
- "25333:25333"
env_file: .env
healthcheck:
test: [ "CMD", "curl", "http://localhost:54321/actuator/health" ]
interval: 30s
timeout: 5s
retries: 3
depends_on:
dolphinscheduler-schema-initializer:
condition: service_completed_successfully
dolphinscheduler-zookeeper:
condition: service_healthy
volumes:
- dolphinscheduler-logs:/opt/dolphinscheduler/logs
- dolphinscheduler-shared-local:/opt/soft
- dolphinscheduler-resource-local:/dolphinscheduler
networks:
- dolphinscheduler
networks: networks:
dolphinscheduler: dolphinscheduler:
driver: bridge driver: bridge

21
deploy/docker/docker-stack.yml

@ -118,27 +118,6 @@ services:
mode: replicated mode: replicated
replicas: 1 replicas: 1
dolphinscheduler-python-gateway:
image: apache/dolphinscheduler-python-gateway
ports:
- 54321:54321
- 25333:25333
env_file: .env
healthcheck:
test: [ "CMD", "curl", "http://localhost:54321/actuator/health" ]
interval: 30s
timeout: 5s
retries: 3
volumes:
- dolphinscheduler-logs:/opt/dolphinscheduler/logs
- dolphinscheduler-shared-local:/opt/soft
- dolphinscheduler-resource-local:/dolphinscheduler
networks:
- dolphinscheduler
deploy:
mode: replicated
replicas: 1
networks: networks:
dolphinscheduler: dolphinscheduler:
driver: overlay driver: overlay

3
deploy/kubernetes/dolphinscheduler/templates/_helpers.tpl

@ -44,9 +44,6 @@ Create default docker images' fullname.
{{- define "dolphinscheduler.image.fullname.tools" -}} {{- define "dolphinscheduler.image.fullname.tools" -}}
{{- .Values.image.registry }}/dolphinscheduler-tools:{{ .Values.image.tag | default .Chart.AppVersion -}} {{- .Values.image.registry }}/dolphinscheduler-tools:{{ .Values.image.tag | default .Chart.AppVersion -}}
{{- end -}} {{- end -}}
{{- define "dolphinscheduler.image.fullname.python-gateway" -}}
{{- .Values.image.registry }}/dolphinscheduler-python-gateway:{{ .Values.image.tag | default .Chart.AppVersion -}}
{{- end -}}
{{/* {{/*
Create a default common labels. Create a default common labels.

6
docs/docs/en/faq.md

@ -712,6 +712,12 @@ A:The repair can be completed by executing the following SQL in the database:
update t_ds_version set version='2.0.1'; update t_ds_version set version='2.0.1';
``` ```
## Can not find python-gateway-server in distribute package
After version 3.0.0-alpha, Python gateway server integrate into API server, and Python gateway service will start when you
start API server. If you want disabled when Python gateway service you could change API server configuration in path
`api-server/conf/application.yaml` and change attribute `python-gateway.enabled : false`.
--- ---
## We will collect more FAQ later ## We will collect more FAQ later

4
docs/docs/en/guide/installation/pseudo-cluster.md

@ -193,7 +193,9 @@ sh ./bin/dolphinscheduler-daemon.sh start alert-server
sh ./bin/dolphinscheduler-daemon.sh stop alert-server sh ./bin/dolphinscheduler-daemon.sh stop alert-server
``` ```
> **_Note:_**: Please refer to the section of "System Architecture Design" for service usage > **_Note:_**: Please refer to the section of "System Architecture Design" for service usage. Python gateway service is
> started along with the api-server, and if you do not want to start Python gateway service please disabled it by changing
> the yaml config `python-gateway.enabled : false` in api-server's configuration path `api-server/conf/application.yaml`
[jdk]: https://www.oracle.com/technetwork/java/javase/downloads/index.html [jdk]: https://www.oracle.com/technetwork/java/javase/downloads/index.html
[zookeeper]: https://zookeeper.apache.org/releases.html [zookeeper]: https://zookeeper.apache.org/releases.html

4
docs/docs/en/guide/installation/standalone.md

@ -39,4 +39,8 @@ sh ./bin/dolphinscheduler-daemon.sh start standalone-server
sh ./bin/dolphinscheduler-daemon.sh stop standalone-server sh ./bin/dolphinscheduler-daemon.sh stop standalone-server
``` ```
> Note: Python gateway service is started along with the api-server, and if you do not want to start Python gateway
> service please disabled it by changing the yaml config `python-gateway.enabled : false` in api-server's configuration
> path `api-server/conf/application.yaml`
[jdk]: https://www.oracle.com/technetwork/java/javase/downloads/index.html [jdk]: https://www.oracle.com/technetwork/java/javase/downloads/index.html

2
docs/docs/en/guide/upgrade.md

@ -6,7 +6,7 @@
`sh ./script/stop-all.sh` `sh ./script/stop-all.sh`
## Download the Newest Version Installation Package ## Download the Latest Version Installation Package
- [download](/en-us/download/download.html) the latest version of the installation packages. - [download](/en-us/download/download.html) the latest version of the installation packages.
- The following upgrade operations need to be performed in the new version's directory. - The following upgrade operations need to be performed in the new version's directory.

6
dolphinscheduler-api/pom.xml

@ -275,6 +275,12 @@
</exclusions> </exclusions>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<!-- Python -->
<dependency>
<groupId>net.sf.py4j</groupId>
<artifactId>py4j</artifactId>
</dependency>
</dependencies> </dependencies>
<build> <build>

2
dolphinscheduler-api/src/main/docker/Dockerfile

@ -29,6 +29,6 @@ WORKDIR $DOLPHINSCHEDULER_HOME
ADD ./target/api-server $DOLPHINSCHEDULER_HOME ADD ./target/api-server $DOLPHINSCHEDULER_HOME
EXPOSE 12345 EXPOSE 12345 25333
CMD [ "/bin/bash", "./bin/start.sh" ] CMD [ "/bin/bash", "./bin/start.sh" ]

16
dolphinscheduler-python/src/main/java/org/apache/dolphinscheduler/server/config/PythonGatewayConfig.java → dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/configuration/PythonGatewayConfiguration.java

@ -15,17 +15,17 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.dolphinscheduler.server.config; package org.apache.dolphinscheduler.api.configuration;
import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
@Component @Component
@EnableConfigurationProperties @EnableConfigurationProperties
@ConfigurationProperties("python-gateway") @ConfigurationProperties(value = "python-gateway", ignoreUnknownFields = false)
public class PythonGatewayConfig { public class PythonGatewayConfiguration {
private boolean enabled;
private String gatewayServerAddress; private String gatewayServerAddress;
private int gatewayServerPort; private int gatewayServerPort;
private String pythonAddress; private String pythonAddress;
@ -33,6 +33,14 @@ public class PythonGatewayConfig {
private int connectTimeout; private int connectTimeout;
private int readTimeout; private int readTimeout;
public boolean getEnabled() {
return enabled;
}
public void setEnabled(boolean enabled) {
this.enabled = enabled;
}
public String getGatewayServerAddress() { public String getGatewayServerAddress() {
return gatewayServerAddress; return gatewayServerAddress;
} }

54
dolphinscheduler-python/src/main/java/org/apache/dolphinscheduler/server/PythonGatewayServer.java → dolphinscheduler-api/src/main/java/org/apache/dolphinscheduler/api/python/PythonGateway.java

@ -15,7 +15,7 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.dolphinscheduler.server; package org.apache.dolphinscheduler.api.python;
import org.apache.dolphinscheduler.api.dto.resources.ResourceComponent; import org.apache.dolphinscheduler.api.dto.resources.ResourceComponent;
import org.apache.dolphinscheduler.api.enums.Status; import org.apache.dolphinscheduler.api.enums.Status;
@ -56,7 +56,7 @@ import org.apache.dolphinscheduler.dao.mapper.ProjectMapper;
import org.apache.dolphinscheduler.dao.mapper.ProjectUserMapper; import org.apache.dolphinscheduler.dao.mapper.ProjectUserMapper;
import org.apache.dolphinscheduler.dao.mapper.ScheduleMapper; import org.apache.dolphinscheduler.dao.mapper.ScheduleMapper;
import org.apache.dolphinscheduler.dao.mapper.TaskDefinitionMapper; import org.apache.dolphinscheduler.dao.mapper.TaskDefinitionMapper;
import org.apache.dolphinscheduler.server.config.PythonGatewayConfig; import org.apache.dolphinscheduler.api.configuration.PythonGatewayConfiguration;
import org.apache.dolphinscheduler.spi.enums.ResourceType; import org.apache.dolphinscheduler.spi.enums.ResourceType;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
@ -75,17 +75,13 @@ import javax.annotation.PostConstruct;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.SpringApplication; import org.springframework.stereotype.Component;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.web.servlet.support.SpringBootServletInitializer;
import org.springframework.context.annotation.ComponentScan;
import py4j.GatewayServer; import py4j.GatewayServer;
@SpringBootApplication @Component
@ComponentScan(value = "org.apache.dolphinscheduler") public class PythonGateway {
public class PythonGatewayServer extends SpringBootServletInitializer { private static final Logger logger = LoggerFactory.getLogger(PythonGateway.class);
private static final Logger logger = LoggerFactory.getLogger(PythonGatewayServer.class);
private static final WarningType DEFAULT_WARNING_TYPE = WarningType.NONE; private static final WarningType DEFAULT_WARNING_TYPE = WarningType.NONE;
private static final int DEFAULT_WARNING_GROUP_ID = 0; private static final int DEFAULT_WARNING_GROUP_ID = 0;
@ -141,7 +137,7 @@ public class PythonGatewayServer extends SpringBootServletInitializer {
private DataSourceMapper dataSourceMapper; private DataSourceMapper dataSourceMapper;
@Autowired @Autowired
private PythonGatewayConfig pythonGatewayConfig; private PythonGatewayConfiguration pythonGatewayConfiguration;
@Autowired @Autowired
private ProjectUserMapper projectUserMapper; private ProjectUserMapper projectUserMapper;
@ -546,30 +542,32 @@ public class PythonGatewayServer extends SpringBootServletInitializer {
} }
@PostConstruct @PostConstruct
public void run() { public void init() {
if (pythonGatewayConfiguration.getEnabled()) {
this.start();
}
}
private void start() {
GatewayServer server; GatewayServer server;
try { try {
InetAddress gatewayHost = InetAddress.getByName(pythonGatewayConfig.getGatewayServerAddress()); InetAddress gatewayHost = InetAddress.getByName(pythonGatewayConfiguration.getGatewayServerAddress());
InetAddress pythonHost = InetAddress.getByName(pythonGatewayConfig.getPythonAddress()); InetAddress pythonHost = InetAddress.getByName(pythonGatewayConfiguration.getPythonAddress());
server = new GatewayServer( server = new GatewayServer(
this, this,
pythonGatewayConfig.getGatewayServerPort(), pythonGatewayConfiguration.getGatewayServerPort(),
pythonGatewayConfig.getPythonPort(), pythonGatewayConfiguration.getPythonPort(),
gatewayHost, gatewayHost,
pythonHost, pythonHost,
pythonGatewayConfig.getConnectTimeout(), pythonGatewayConfiguration.getConnectTimeout(),
pythonGatewayConfig.getReadTimeout(), pythonGatewayConfiguration.getReadTimeout(),
null null
); );
GatewayServer.turnLoggingOn(); GatewayServer.turnLoggingOn();
logger.info("PythonGatewayServer started on: " + gatewayHost.toString()); logger.info("PythonGatewayService started on: " + gatewayHost.toString());
server.start(); server.start();
} catch (UnknownHostException e) { } catch (UnknownHostException e) {
logger.error("exception occurred while constructing PythonGatewayServer().", e); logger.error("exception occurred while constructing PythonGatewayService().", e);
} }
} }
public static void main(String[] args) {
SpringApplication.run(PythonGatewayServer.class, args);
}
} }

20
dolphinscheduler-api/src/main/resources/application.yaml

@ -108,6 +108,26 @@ audit:
metrics: metrics:
enabled: true enabled: true
python-gateway:
# Weather enable python gateway server or not. The default value is true.
enabled: true
# The address of Python gateway server start. Set its value to `0.0.0.0` if your Python API run in different
# between Python gateway server. It could be be specific to other address like `127.0.0.1` or `localhost`
gateway-server-address: 0.0.0.0
# The port of Python gateway server start. Define which port you could connect to Python gateway server from
# Python API side.
gateway-server-port: 25333
# The address of Python callback client.
python-address: 127.0.0.1
# The port of Python callback client.
python-port: 25334
# Close connection of socket server if no other request accept after x milliseconds. Define value is (0 = infinite),
# and socket server would never close even though no requests accept
connect-timeout: 0
# Close each active connection of socket server if python program not active after x milliseconds. Define value is
# (0 = infinite), and socket server would never close even though no requests accept
read-timeout: 0
# Override by profile # Override by profile
--- ---

5
dolphinscheduler-dist/src/main/assembly/dolphinscheduler-bin.xml vendored

@ -50,11 +50,6 @@
<outputDirectory>logger-server</outputDirectory> <outputDirectory>logger-server</outputDirectory>
</fileSet> </fileSet>
<fileSet>
<directory>${basedir}/../dolphinscheduler-python/target/python-gateway-server</directory>
<outputDirectory>python-gateway-server</outputDirectory>
</fileSet>
<fileSet> <fileSet>
<directory>${basedir}/../dolphinscheduler-standalone-server/target/standalone-server</directory> <directory>${basedir}/../dolphinscheduler-standalone-server/target/standalone-server</directory>
<outputDirectory>standalone-server</outputDirectory> <outputDirectory>standalone-server</outputDirectory>

75
dolphinscheduler-python/pom.xml

@ -28,82 +28,7 @@
<name>${project.artifactId}</name> <name>${project.artifactId}</name>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies>
<!-- dolphinscheduler -->
<dependency>
<groupId>org.apache.dolphinscheduler</groupId>
<artifactId>dolphinscheduler-api</artifactId>
</dependency>
<!--springboot-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-tomcat</artifactId>
</exclusion>
<exclusion>
<artifactId>log4j-to-slf4j</artifactId>
<groupId>org.apache.logging.log4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>net.sf.py4j</groupId>
<artifactId>py4j</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<excludes>
<exclude>*.yaml</exclude>
<exclude>*.xml</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<id>dolphinscheduler-python-gateway-server</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<finalName>python-gateway-server</finalName>
<descriptors>
<descriptor>src/main/assembly/dolphinscheduler-python-gateway-server.xml</descriptor>
</descriptors>
<appendAssemblyId>false</appendAssemblyId>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<profiles> <profiles>
<profile>
<id>docker</id>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</profile>
<profile> <profile>
<id>python</id> <id>python</id>
<build> <build>

3
dolphinscheduler-python/pydolphinscheduler/UPDATING.md

@ -24,6 +24,9 @@ It started after version 2.0.5 released
## dev ## dev
* Integrate Python gateway server into Dolphinscheduler API server, and you could start Python gateway service by command
`./bin/dolphinscheduler-daemon.sh start api-server` instead of independent command
`./bin/dolphinscheduler-daemon.sh start python-gateway-server`.
* Remove parameter `queue` from class `ProcessDefinition` to avoid confuse user when it change but not work * Remove parameter `queue` from class `ProcessDefinition` to avoid confuse user when it change but not work
* Change `yaml_parser.py` method `to_string` to magic method `__str__` make it more pythonic. * Change `yaml_parser.py` method `to_string` to magic method `__str__` make it more pythonic.
* Use package ``ruamel.yaml`` replace ``pyyaml`` for write yaml file with comment. * Use package ``ruamel.yaml`` replace ``pyyaml`` for write yaml file with comment.

28
dolphinscheduler-python/pydolphinscheduler/docs/source/start.rst

@ -55,7 +55,7 @@ After Python is already installed on your machine following section
$ pip install apache-dolphinscheduler $ pip install apache-dolphinscheduler
The latest version of *PyDolphinScheduler* would be installed after you run above The latest version of *PyDolphinScheduler* would be installed after you run above
command in your terminal. You could go and `start Python Gateway Server`_ to finish command in your terminal. You could go and `start Python Gateway Service`_ to finish
the prepare, and then go to :doc:`tutorial` to make your hand dirty. But if you the prepare, and then go to :doc:`tutorial` to make your hand dirty. But if you
want to install the unreleased version of *PyDolphinScheduler*, you could go and see want to install the unreleased version of *PyDolphinScheduler*, you could go and see
section `installing PyDolphinScheduler in dev`_ for more detail. section `installing PyDolphinScheduler in dev`_ for more detail.
@ -74,33 +74,39 @@ which we hold in GitHub
# Install PyDolphinScheduler in develop mode # Install PyDolphinScheduler in develop mode
$ cd dolphinscheduler-python/pydolphinscheduler && pip install -e . $ cd dolphinscheduler-python/pydolphinscheduler && pip install -e .
After you installed *PyDolphinScheduler*, please remember `start Python Gateway Server`_ After you installed *PyDolphinScheduler*, please remember `start Python Gateway Service`_
which waiting for *PyDolphinScheduler*'s workflow definition require. which waiting for *PyDolphinScheduler*'s workflow definition require.
Start Python Gateway Server Start Python Gateway Service
--------------------------- ----------------------------
Since **PyDolphinScheduler** is Python API for `Apache DolphinScheduler`_, it Since **PyDolphinScheduler** is Python API for `Apache DolphinScheduler`_, it
could define workflow and tasks structure, but could not run it unless you could define workflow and tasks structure, but could not run it unless you
`install Apache DolphinScheduler`_ and start Python gateway server. We only `install Apache DolphinScheduler`_ and start its API server which including
and some key steps here and you could go `install Apache DolphinScheduler`_ Python gateway service in it. We only and some key steps here and you could
for more detail go `install Apache DolphinScheduler`_ for more detail
.. code-block:: bash .. code-block:: bash
# Start pythonGatewayServer # Start DolphinScheduler api-server which including python gateway service
$ ./bin/dolphinscheduler-daemon.sh start pythonGatewayServer $ ./bin/dolphinscheduler-daemon.sh start api-server
To check whether the server is alive or not, you could run :code:`jps`. And To check whether the server is alive or not, you could run :code:`jps`. And
the server is health if keyword `PythonGatewayServer` in the console. the server is health if keyword `ApiApplicationServer` in the console.
.. code-block:: bash .. code-block:: bash
$ jps $ jps
.... ....
201472 PythonGatewayServer 201472 ApiApplicationServer
.... ....
.. note::
Please make sure you already enabled started Python gateway service along with `api-server`. The configuration is in
yaml config path `python-gateway.enabled : true` in api-server's configuration path in `api-server/conf/application.yaml`.
The default value is true and Python gateway service start when api server is been started.
What's More What's More
----------- -----------

2
dolphinscheduler-python/pydolphinscheduler/docs/source/tutorial.rst

@ -130,7 +130,7 @@ Now, we could run the Python code like other Python script, for the basic usage
:end-before: [end submit_or_run] :end-before: [end submit_or_run]
If you not start your Apache DolphinScheduler server, you could find the way in If you not start your Apache DolphinScheduler server, you could find the way in
:ref:`start:start Python gateway server` and it would have more detail about related server :ref:`start:start Python gateway service` and it would have more detail about related server
start. Beside attribute `run`, we have attribute `submit` for object `ProcessDefinition` start. Beside attribute `run`, we have attribute `submit` for object `ProcessDefinition`
and it just submit workflow to the daemon but not setting the schedule information. For and it just submit workflow to the daemon but not setting the schedule information. For
more detail you could see :ref:`concept:process definition`. more detail you could see :ref:`concept:process definition`.

2
dolphinscheduler-python/pydolphinscheduler/tests/integration/__init__.py

@ -15,4 +15,4 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
"""Test integration between Python API and PythonGatewayServer.""" """Test integration between Python API and PythonGatewayService."""

4
dolphinscheduler-python/pydolphinscheduler/tests/integration/test_submit_examples.py

@ -15,7 +15,7 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
"""Test whether success submit examples DAG to PythonGatewayServer.""" """Test whether success submit examples DAG to PythonGatewayService."""
from pathlib import Path from pathlib import Path
@ -34,7 +34,7 @@ from tests.testing.path import path_example
], ],
) )
def test_exec_white_list_example(example_path: Path): def test_exec_white_list_example(example_path: Path):
"""Test execute examples and submit DAG to PythonGatewayServer.""" """Test execute examples and submit DAG to PythonGatewayService."""
try: try:
exec(example_path.read_text()) exec(example_path.read_text())
except Exception: except Exception:

64
dolphinscheduler-python/src/main/assembly/dolphinscheduler-python-gateway-server.xml

@ -1,64 +0,0 @@
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.1.0 http://maven.apache.org/xsd/assembly-2.1.0.xsd">
<id>dolphinscheduler-python-gateway-server</id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<baseDirectory>python-gateway-server</baseDirectory>
<fileSets>
<fileSet>
<directory>${basedir}/src/main/resources</directory>
<includes>
<include>*.yaml</include>
<include>*.xml</include>
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
<fileSet>
<directory>${basedir}/src/main/bin</directory>
<outputDirectory>bin</outputDirectory>
<fileMode>0755</fileMode>
<directoryMode>0755</directoryMode>
</fileSet>
<fileSet>
<directory>${basedir}/../script/env</directory>
<outputDirectory>bin</outputDirectory>
<includes>
<include>dolphinscheduler_env.sh</include>
</includes>
<fileMode>0755</fileMode>
<directoryMode>0755</directoryMode>
</fileSet>
<fileSet>
<directory>${basedir}/../dolphinscheduler-common/src/main/resources</directory>
<includes>
<include>**/*.properties</include>
</includes>
<outputDirectory>conf</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<outputDirectory>libs</outputDirectory>
</dependencySet>
</dependencySets>
</assembly>

32
dolphinscheduler-python/src/main/bin/start.sh

@ -1,32 +0,0 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
BIN_DIR=$(dirname $0)
DOLPHINSCHEDULER_HOME=${DOLPHINSCHEDULER_HOME:-$(cd $BIN_DIR/..; pwd)}
source "$BIN_DIR/dolphinscheduler_env.sh"
JAVA_OPTS=${JAVA_OPTS:-"-server -Duser.timezone=${SPRING_JACKSON_TIME_ZONE} -Xms1g -Xmx1g -Xmn512m -XX:+PrintGCDetails -Xloggc:gc.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=dump.hprof"}
if [[ "$DOCKER" == "true" ]]; then
JAVA_OPTS="${JAVA_OPTS} -XX:-UseContainerSupport"
fi
java $JAVA_OPTS \
-cp "$DOLPHINSCHEDULER_HOME/conf":"$DOLPHINSCHEDULER_HOME/libs/*" \
org.apache.dolphinscheduler.server.PythonGatewayServer

34
dolphinscheduler-python/src/main/docker/Dockerfile

@ -1,34 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM openjdk:8-jre-slim-buster
ENV DOCKER true
ENV TZ Asia/Shanghai
ENV DOLPHINSCHEDULER_HOME /opt/dolphinscheduler
RUN apt update ; \
apt install -y curl sudo ; \
rm -rf /var/lib/apt/lists/*
WORKDIR $DOLPHINSCHEDULER_HOME
ADD ./target/python-gateway-server $DOLPHINSCHEDULER_HOME
EXPOSE 25333 54321
CMD [ "/bin/bash", "./bin/start.sh" ]

83
dolphinscheduler-python/src/main/resources/application.yaml

@ -1,83 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
spring:
application:
name: python-gateway-server
main:
banner-mode: off
datasource:
driver-class-name: org.h2.Driver
url: jdbc:h2:mem:dolphinscheduler;MODE=MySQL;DB_CLOSE_DELAY=-1;DATABASE_TO_LOWER=true
username: sa
password: ""
jackson:
time-zone: UTC
date-format: "yyyy-MM-dd HH:mm:ss"
servlet:
multipart:
max-file-size: 1024MB
max-request-size: 1024MB
messages:
basename: i18n/messages
jpa:
hibernate:
ddl-auto: none
python-gateway:
# The address of Python gateway server start. Set its value to `0.0.0.0` if your Python API run in different
# between Python gateway server. It could be be specific to other address like `127.0.0.1` or `localhost`
gateway-server-address: 0.0.0.0
# The port of Python gateway server start. Define which port you could connect to Python gateway server from
# Python API side.
gateway-server-port: 25333
# The address of Python callback client.
python-address: 127.0.0.1
# The port of Python callback client.
python-port: 25334
# Close connection of socket server if no other request accept after x milliseconds. Define value is (0 = infinite),
# and socket server would never close even though no requests accept
connect-timeout: 0
# Close each active connection of socket server if python program not active after x milliseconds. Define value is
# (0 = infinite), and socket server would never close even though no requests accept
read-timeout: 0
server:
port: 54321
management:
endpoints:
web:
exposure:
include: '*'
metrics:
tags:
application: ${spring.application.name}
metrics:
enabled: true
# Override by profile
---
spring:
config:
activate:
on-profile: postgresql
quartz:
properties:
org.quartz.jobStore.driverDelegateClass: org.quartz.impl.jdbcjobstore.PostgreSQLDelegate

57
dolphinscheduler-python/src/main/resources/logback-spring.xml

@ -1,57 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<configuration scan="true" scanPeriod="120 seconds">
<property name="log.base" value="logs"/>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>
[%level] %date{yyyy-MM-dd HH:mm:ss.SSS} %logger{96}:[%line] - %msg%n
</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<appender name="PYTHONGATEWAYLOGFILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.base}/dolphinscheduler-python-gateway.log</file>
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<fileNamePattern>${log.base}/dolphinscheduler-python-gateway.%d{yyyy-MM-dd_HH}.%i.log</fileNamePattern>
<maxHistory>168</maxHistory>
<maxFileSize>64MB</maxFileSize>
</rollingPolicy>
<encoder>
<pattern>
[%level] %date{yyyy-MM-dd HH:mm:ss.SSS} %logger{96}:[%line] - %msg%n
</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<root level="INFO">
<if condition="${DOCKER:-false}">
<then>
<appender-ref ref="STDOUT"/>
</then>
</if>
<appender-ref ref="PYTHONGATEWAYLOGFILE"/>
</root>
</configuration>

4
dolphinscheduler-standalone-server/src/main/assembly/dolphinscheduler-standalone-server.xml

@ -45,10 +45,6 @@
<directory>${basedir}/../dolphinscheduler-log-server/target/logger-server/libs</directory> <directory>${basedir}/../dolphinscheduler-log-server/target/logger-server/libs</directory>
<outputDirectory>libs/logger-server</outputDirectory> <outputDirectory>libs/logger-server</outputDirectory>
</fileSet> </fileSet>
<fileSet>
<directory>${basedir}/../dolphinscheduler-python/target/python-gateway-server/libs</directory>
<outputDirectory>libs/python-gateway</outputDirectory>
</fileSet>
<fileSet> <fileSet>
<directory>${basedir}/src/main/resources</directory> <directory>${basedir}/src/main/resources</directory>

2
dolphinscheduler-standalone-server/src/main/dist-bin/start.sh

@ -28,7 +28,7 @@ if [[ "$DOCKER" == "true" ]]; then
fi fi
CP=$DOLPHINSCHEDULER_HOME/libs/standalone-server/* CP=$DOLPHINSCHEDULER_HOME/libs/standalone-server/*
for d in alert-server api-server master-server python-gateway-server worker-server; do for d in alert-server api-server master-server worker-server; do
for f in $DOLPHINSCHEDULER_HOME/../$d/libs/*.jar; do for f in $DOLPHINSCHEDULER_HOME/../$d/libs/*.jar; do
CP=$CP:$f CP=$CP:$f
done done

2
dolphinscheduler-standalone-server/src/main/resources/application.yaml

@ -140,6 +140,8 @@ alert:
port: 50052 port: 50052
python-gateway: python-gateway:
# Weather enable python gateway server or not. The default value is true.
enabled: true
# The address of Python gateway server start. Set its value to `0.0.0.0` if your Python API run in different # The address of Python gateway server start. Set its value to `0.0.0.0` if your Python API run in different
# between Python gateway server. It could be be specific to other address like `127.0.0.1` or `localhost` # between Python gateway server. It could be be specific to other address like `127.0.0.1` or `localhost`
gateway-server-address: 0.0.0.0 gateway-server-address: 0.0.0.0

4
script/dolphinscheduler-daemon.sh

@ -16,7 +16,7 @@
# limitations under the License. # limitations under the License.
# #
usage="Usage: dolphinscheduler-daemon.sh (start|stop|status) <api-server|master-server|worker-server|alert-server|python-gateway-server|standalone-server> " usage="Usage: dolphinscheduler-daemon.sh (start|stop|status) <api-server|master-server|worker-server|alert-server|standalone-server> "
# if no args specified, show usage # if no args specified, show usage
if [ $# -le 1 ]; then if [ $# -le 1 ]; then
@ -61,8 +61,6 @@ elif [ "$command" = "alert-server" ]; then
log=$DOLPHINSCHEDULER_HOME/alert-server/logs/$command-$HOSTNAME.out log=$DOLPHINSCHEDULER_HOME/alert-server/logs/$command-$HOSTNAME.out
elif [ "$command" = "standalone-server" ]; then elif [ "$command" = "standalone-server" ]; then
log=$DOLPHINSCHEDULER_HOME/standalone-server/logs/$command-$HOSTNAME.out log=$DOLPHINSCHEDULER_HOME/standalone-server/logs/$command-$HOSTNAME.out
elif [ "$command" = "python-gateway-server" ]; then
log=$DOLPHINSCHEDULER_HOME/python-gateway-server/logs/$command-$HOSTNAME.out
else else
echo "Error: No command named '$command' was found." echo "Error: No command named '$command' was found."
exit 1 exit 1

5
script/env/install_env.sh vendored

@ -48,11 +48,6 @@ alertServer=${alertServer:-"ds3"}
# Example for hostname: apiServers="ds1", Example for IP: apiServers="192.168.8.1" # Example for hostname: apiServers="ds1", Example for IP: apiServers="192.168.8.1"
apiServers=${apiServers:-"ds1"} apiServers=${apiServers:-"ds1"}
# A comma separated list of machine hostname or IP would be installed Python gateway server, it
# must be a subset of configuration `ips`.
# Example for hostname: pythonGatewayServers="ds1", Example for IP: pythonGatewayServers="192.168.8.1"
pythonGatewayServers=${pythonGatewayServers:-"ds1"}
# The directory to install DolphinScheduler for all machine we config above. It will automatically be created by `install.sh` script if not exists. # The directory to install DolphinScheduler for all machine we config above. It will automatically be created by `install.sh` script if not exists.
# Do not set this configuration same as the current path (pwd) # Do not set this configuration same as the current path (pwd)
installPath=${installPath:-"/tmp/dolphinscheduler"} installPath=${installPath:-"/tmp/dolphinscheduler"}

2
script/scp-hosts.sh

@ -49,7 +49,7 @@ do
echo "scp dirs to $host/$installPath starting" echo "scp dirs to $host/$installPath starting"
ssh -p $sshPort $host "cd $installPath/; rm -rf bin/ conf/ lib/ script/ sql/ ui/" ssh -p $sshPort $host "cd $installPath/; rm -rf bin/ conf/ lib/ script/ sql/ ui/"
for dsDir in bin master-server worker-server alert-server api-server ui python-gateway-server for dsDir in bin master-server worker-server alert-server api-server ui
do do
# if worker in workersGroupMap # if worker in workersGroupMap
if [[ "${workersGroupMap[${host}]}" ]]; then if [[ "${workersGroupMap[${host}]}" ]]; then

7
script/start-all.sh

@ -56,13 +56,6 @@ do
ssh -p $sshPort $apiServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh start api-server;" ssh -p $sshPort $apiServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh start api-server;"
done done
pythonGatewayHost=(${pythonGatewayServers//,/ })
for pythonGatewayServer in "${pythonGatewayHost[@]}"
do
echo "$pythonGatewayServer python gateway server is starting"
ssh -p $sshPort $pythonGatewayServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh start python-gateway-server;"
done
# query server status # query server status
echo "query server status" echo "query server status"
cd $installPath/; sh bin/status-all.sh cd $installPath/; sh bin/status-all.sh

8
script/status-all.sh

@ -74,11 +74,3 @@ do
apiState=`ssh -p $sshPort $apiServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh status api-server;"` apiState=`ssh -p $sshPort $apiServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh status api-server;"`
echo "$apiServer $apiState" echo "$apiServer $apiState"
done done
# python gateway server check state
pythonGatewayHost=(${pythonGatewayServers//,/ })
for pythonGatewayServer in "${pythonGatewayHost[@]}"
do
pythonGatewayState=`ssh -p $sshPort $pythonGatewayServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh status python-gateway-server;"`
echo "$pythonGatewayServer $pythonGatewayState"
done

7
script/stop-all.sh

@ -54,10 +54,3 @@ do
echo "$apiServer api server is stopping" echo "$apiServer api server is stopping"
ssh -p $sshPort $apiServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh stop api-server;" ssh -p $sshPort $apiServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh stop api-server;"
done done
pythonGatewayHost=(${pythonGatewayServers//,/ })
for pythonGatewayServer in "${pythonGatewayHost[@]}"
do
echo "$pythonGatewayServer python gateway server is stopping"
ssh -p $sshPort $pythonGatewayServer "cd $installPath/; sh bin/dolphinscheduler-daemon.sh stop python-gateway-server;"
done

Loading…
Cancel
Save