From 6911a39b491ef3ea65feffffe1fb4a6284e40475 Mon Sep 17 00:00:00 2001 From: chengshiwen Date: Thu, 25 Feb 2021 18:44:21 +0800 Subject: [PATCH] [Improvement][Docker] Add FAQ in readme --- docker/build/README.md | 147 ++++++++++++++++++++++++- docker/build/README_zh_CN.md | 145 +++++++++++++++++++++++- docker/docker-swarm/docker-compose.yml | 12 ++ docker/docker-swarm/docker-stack.yml | 12 ++ 4 files changed, 309 insertions(+), 7 deletions(-) diff --git a/docker/build/README.md b/docker/build/README.md index f1c952b4cb..a51038e8e5 100644 --- a/docker/build/README.md +++ b/docker/build/README.md @@ -1,12 +1,12 @@ -## What is Dolphin Scheduler? +## What is DolphinScheduler? -Dolphin Scheduler is a distributed and easy-to-expand visual DAG workflow scheduling system, dedicated to solving the complex dependencies in data processing, making the scheduling system out of the box for data processing. +DolphinScheduler is a distributed and easy-to-expand visual DAG workflow scheduling system, dedicated to solving the complex dependencies in data processing, making the scheduling system out of the box for data processing. GitHub URL: https://github.com/apache/incubator-dolphinscheduler Official Website: https://dolphinscheduler.apache.org -![Dolphin Scheduler](https://dolphinscheduler.apache.org/img/hlogo_colorful.svg) +![DolphinScheduler](https://dolphinscheduler.apache.org/img/hlogo_colorful.svg) [![EN doc](https://img.shields.io/badge/document-English-blue.svg)](README.md) [![CN doc](https://img.shields.io/badge/文档-中文版-blue.svg)](README_zh_CN.md) @@ -118,7 +118,7 @@ Please read `./docker/build/hooks/build` `./docker/build/hooks/build.bat` script ## Environment Variables -The Dolphin Scheduler image uses several environment variables which are easy to miss. While none of the variables are required, they may significantly aid you in using the image. +The DolphinScheduler image uses several environment variables which are easy to miss. While none of the variables are required, they may significantly aid you in using the image. **`DATABASE_TYPE`** @@ -308,3 +308,142 @@ EOF " > ${DOLPHINSCHEDULER_HOME}/conf/${line%.*} done ``` + +## FAQ + +### How to stop dolphinscheduler by docker-compose? + +Stop containers: + +``` +docker-compose stop +``` + +Stop containers and removes containers, networks and volumes: + +``` +docker-compose down -v +``` + +### How to deploy dolphinscheduler on Docker Swarm? + +Assuming that the Docker Swarm cluster has been created (If there is no Docker Swarm cluster, please refer to [https://docs.docker.com/engine/swarm/swarm-tutorial/create-swarm/](https://docs.docker.com/engine/swarm/swarm-tutorial/create-swarm/)) + +Start a stack named dolphinscheduler + +``` +docker stack deploy -c docker-stack.yml dolphinscheduler +``` + +Stop and remove the stack named dolphinscheduler + +``` +docker stack rm dolphinscheduler +``` + +### How to use MySQL as the DolphinScheduler's database instead of PostgreSQL? + +> Because of the commercial license, we cannot directly use the driver and client of MySQL. +> +> If you want to use MySQL, you can build a new image based on the `apache/dolphinscheduler` image as follows. + +1. Download the MySQL driver [mysql-connector-java-5.1.49.jar](https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar) (require `>=5.1.47`) + +2. Create a new `Dockerfile` to add MySQL driver and client: + +``` +FROM apache/dolphinscheduler:latest +COPY mysql-connector-java-5.1.49.jar /opt/dolphinscheduler/lib +RUN apk add --update --no-cache mysql-client +``` + +3. Build a new docker image including MySQL driver and client: + +``` +docker build -t apache/dolphinscheduler:mysql . +``` + +4. Modify all `image` fields to `apache/dolphinscheduler:mysql` in `docker-compose.yml` + +> If you want to deploy dolphinscheduler on Docker Swarm, you need modify `docker-stack.yml` + +5. Comment the `dolphinscheduler-postgresql` block in `docker-compose.yml` + +6. Add `dolphinscheduler-mysql` service in `docker-compose.yml` (**Optional**, you can directly use a external MySQL database) + +7. Modify all DATABASE environments in `docker-compose.yml` + +``` +DATABASE_TYPE: mysql +DATABASE_DRIVER: com.mysql.jdbc.Driver +DATABASE_HOST: dolphinscheduler-mysql +DATABASE_PORT: 3306 +DATABASE_USERNAME: root +DATABASE_PASSWORD: root +DATABASE_DATABASE: dolphinscheduler +DATABASE_PARAMS: useUnicode=true&characterEncoding=UTF-8 +``` + +> If you have added `dolphinscheduler-mysql` service in `docker-compose.yml`, just set `DATABASE_HOST` to `dolphinscheduler-mysql` + +8. Run a dolphinscheduler (See **How to use this docker image**) + +### How to support MySQL datasource in `Datasource manage`? + +> Because of the commercial license, we cannot directly use the driver of MySQL. +> +> If you want to add MySQL datasource, you can build a new image based on the `apache/dolphinscheduler` image as follows. + +1. Download the MySQL driver [mysql-connector-java-5.1.49.jar](https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar) (require `>=5.1.47`) + +2. Create a new `Dockerfile` to add MySQL driver: + +``` +FROM apache/dolphinscheduler:latest +COPY mysql-connector-java-5.1.49.jar /opt/dolphinscheduler/lib +``` + +3. Build a new docker image including MySQL driver: + +``` +docker build -t apache/dolphinscheduler:mysql-driver . +``` + +4. Modify all `image` fields to `apache/dolphinscheduler:mysql-driver` in `docker-compose.yml` + +> If you want to deploy dolphinscheduler on Docker Swarm, you need modify `docker-stack.yml` + +5. Run a dolphinscheduler (See **How to use this docker image**) + +6. Add a MySQL datasource in `Datasource manage` + +### How to support Oracle datasource in `Datasource manage`? + +> Because of the commercial license, we cannot directly use the driver of Oracle. +> +> If you want to add Oracle datasource, you can build a new image based on the `apache/dolphinscheduler` image as follows. + +1. Download the Oracle driver [ojdbc8.jar](https://repo1.maven.org/maven2/com/oracle/database/jdbc/ojdbc8/) (such as `ojdbc8-19.9.0.0.jar`) + +2. Create a new `Dockerfile` to add Oracle driver: + +``` +FROM apache/dolphinscheduler:latest +COPY ojdbc8-19.9.0.0.jar /opt/dolphinscheduler/lib +``` + +3. Build a new docker image including Oracle driver: + +``` +docker build -t apache/dolphinscheduler:oracle-driver . +``` + +4. Modify all `image` fields to `apache/dolphinscheduler:oracle-driver` in `docker-compose.yml` + +> If you want to deploy dolphinscheduler on Docker Swarm, you need modify `docker-stack.yml` + +5. Run a dolphinscheduler (See **How to use this docker image**) + +6. Add a Oracle datasource in `Datasource manage` + +For more information please refer to the [incubator-dolphinscheduler](https://github.com/apache/incubator-dolphinscheduler.git) documentation. diff --git a/docker/build/README_zh_CN.md b/docker/build/README_zh_CN.md index 993a27435e..4e7cf58677 100644 --- a/docker/build/README_zh_CN.md +++ b/docker/build/README_zh_CN.md @@ -1,4 +1,4 @@ -## Dolphin Scheduler是什么? +## DolphinScheduler是什么? 一个分布式易扩展的可视化DAG工作流任务调度系统。致力于解决数据处理流程中错综复杂的依赖关系,使调度系统在数据处理流程中`开箱即用`。 @@ -6,7 +6,7 @@ GitHub URL: https://github.com/apache/incubator-dolphinscheduler Official Website: https://dolphinscheduler.apache.org -![Dolphin Scheduler](https://dolphinscheduler.apache.org/img/hlogo_colorful.svg) +![DolphinScheduler](https://dolphinscheduler.apache.org/img/hlogo_colorful.svg) [![EN doc](https://img.shields.io/badge/document-English-blue.svg)](README.md) [![CN doc](https://img.shields.io/badge/文档-中文版-blue.svg)](README_zh_CN.md) @@ -115,7 +115,7 @@ C:\incubator-dolphinscheduler>.\docker\build\hooks\build.bat ## 环境变量 -Dolphin Scheduler映像使用了几个容易遗漏的环境变量。虽然这些变量不是必须的,但是可以帮助你更容易配置镜像并根据你的需求定义相应的服务配置。 +DolphinScheduler映像使用了几个容易遗漏的环境变量。虽然这些变量不是必须的,但是可以帮助你更容易配置镜像并根据你的需求定义相应的服务配置。 **`DATABASE_TYPE`** @@ -305,3 +305,142 @@ EOF " > ${DOLPHINSCHEDULER_HOME}/conf/${line%.*} done ``` + +## FAQ + +### 如何通过 docker-compose 停止 dolphinscheduler? + +停止所有容器: + +``` +docker-compose stop +``` + +停止所有容器并移除所有容器,网络和存储卷: + +``` +docker-compose down -v +``` + +### 如何在 Docker Swarm 上部署 dolphinscheduler? + +假设 Docker Swarm 集群已经部署(如果还没有创建 Docker Swarm 集群,请参考 [https://docs.docker.com/engine/swarm/swarm-tutorial/create-swarm/](https://docs.docker.com/engine/swarm/swarm-tutorial/create-swarm/) + +启动名为 dolphinscheduler 的 stack + +``` +docker stack deploy -c docker-stack.yml dolphinscheduler +``` + +启动并移除名为 dolphinscheduler 的 stack + +``` +docker stack rm dolphinscheduler +``` + +### 如何用 MySQL 替代 PostgreSQL 作为 DolphinScheduler 的数据库? + +> 由于商业许可证的原因,我们不能直接使用 MySQL 的驱动包和客户端. +> +> 如果你要使用 MySQL, 你可以基于官方镜像 `apache/dolphinscheduler` 进行构建. + +1. 下载 MySQL 驱动包 [mysql-connector-java-5.1.49.jar](https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar) (要求 `>=5.1.47`) + +2. 创建一个新的 `Dockerfile`,用于添加 MySQL 的驱动包和客户端: + +``` +FROM apache/dolphinscheduler:latest +COPY mysql-connector-java-5.1.49.jar /opt/dolphinscheduler/lib +RUN apk add --update --no-cache mysql-client +``` + +3. 构建一个包含 MySQL 的驱动包和客户端的新镜像: + +``` +docker build -t apache/dolphinscheduler:mysql . +``` + +4. 修改 `docker-compose.yml` 文件中的所有 image 字段为 `apache/dolphinscheduler:mysql` + +> 如果你想在 Docker Swarm 上部署 dolphinscheduler,你需要修改 `docker-stack.yml` + +5. 注释 `docker-compose.yml` 文件中的 `dolphinscheduler-postgresql` 块 + +6. 在 `docker-compose.yml` 文件中添加 `dolphinscheduler-mysql` 服务(**可选**,你可以直接使用一个外部的 MySQL 数据库) + +7. 修改 `docker-compose.yml` 文件中的所有 DATABASE 环境变量 + +``` +DATABASE_TYPE: mysql +DATABASE_DRIVER: com.mysql.jdbc.Driver +DATABASE_HOST: dolphinscheduler-mysql +DATABASE_PORT: 3306 +DATABASE_USERNAME: root +DATABASE_PASSWORD: root +DATABASE_DATABASE: dolphinscheduler +DATABASE_PARAMS: useUnicode=true&characterEncoding=UTF-8 +``` + +> 如果你已经添加了 `dolphinscheduler-mysql` 服务,设置 `DATABASE_HOST` 为 `dolphinscheduler-mysql` 即可 + +8. 运行 dolphinscheduler (详见**如何使用docker镜像**) + +### How to support MySQL datasource in `Datasource manage`? + +> 由于商业许可证的原因,我们不能直接使用 MySQL 的驱动包. +> +> 如果你要添加 MySQL 数据源, 你可以基于官方镜像 `apache/dolphinscheduler` 进行构建. + +1. 下载 MySQL 驱动包 [mysql-connector-java-5.1.49.jar](https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.49/mysql-connector-java-5.1.49.jar) (要求 `>=5.1.47`) + +2. 创建一个新的 `Dockerfile`,用于添加 MySQL 驱动包: + +``` +FROM apache/dolphinscheduler:latest +COPY mysql-connector-java-5.1.49.jar /opt/dolphinscheduler/lib +``` + +3. 构建一个包含 MySQL 驱动包的新镜像: + +``` +docker build -t apache/dolphinscheduler:mysql-driver . +``` + +4. 将 `docker-compose.yml` 文件中的所有 image 字段 修改为 `apache/dolphinscheduler:mysql-driver` + +> 如果你想在 Docker Swarm 上部署 dolphinscheduler,你需要修改 `docker-stack.yml` + +5. 运行 dolphinscheduler (详见**如何使用docker镜像**) + +6. 在数据源中心添加一个 MySQL 数据源 + +### How to support Oracle datasource in `Datasource manage`? + +> 由于商业许可证的原因,我们不能直接使用 Oracle 的驱动包. +> +> 如果你要添加 Oracle 数据源, 你可以基于官方镜像 `apache/dolphinscheduler` 进行构建. + +1. 下载 Oracle 驱动包 [ojdbc8.jar](https://repo1.maven.org/maven2/com/oracle/database/jdbc/ojdbc8/) (such as `ojdbc8-19.9.0.0.jar`) + +2. 创建一个新的 `Dockerfile`,用于添加 Oracle 驱动包: + +``` +FROM apache/dolphinscheduler:latest +COPY ojdbc8-19.9.0.0.jar /opt/dolphinscheduler/lib +``` + +3. 构建一个包含 Oracle 驱动包的新镜像: + +``` +docker build -t apache/dolphinscheduler:oracle-driver . +``` + +4. 将 `docker-compose.yml` 文件中的所有 image 字段 修改为 `apache/dolphinscheduler:oracle-driver` + +> 如果你想在 Docker Swarm 上部署 dolphinscheduler,你需要修改 `docker-stack.yml` + +5. 运行 dolphinscheduler (详见**如何使用docker镜像**) + +6. 在数据源中心添加一个 Oracle 数据源 + +更多信息请查看 [incubator-dolphinscheduler](https://github.com/apache/incubator-dolphinscheduler.git) 文档. diff --git a/docker/docker-swarm/docker-compose.yml b/docker/docker-swarm/docker-compose.yml index a4a221c56e..95818fbbf5 100644 --- a/docker/docker-swarm/docker-compose.yml +++ b/docker/docker-swarm/docker-compose.yml @@ -58,11 +58,14 @@ services: - 12345:12345 environment: TZ: Asia/Shanghai + DATABASE_TYPE: postgresql + DATABASE_DRIVER: org.postgresql.Driver DATABASE_HOST: dolphinscheduler-postgresql DATABASE_PORT: 5432 DATABASE_USERNAME: root DATABASE_PASSWORD: root DATABASE_DATABASE: dolphinscheduler + DATABASE_PARAMS: characterEncoding=utf8 ZOOKEEPER_QUORUM: dolphinscheduler-zookeeper:2181 RESOURCE_STORAGE_TYPE: HDFS RESOURCE_UPLOAD_PATH: /dolphinscheduler @@ -92,11 +95,14 @@ services: environment: TZ: Asia/Shanghai ALERT_PLUGIN_DIR: lib/plugin/alert + DATABASE_TYPE: postgresql + DATABASE_DRIVER: org.postgresql.Driver DATABASE_HOST: dolphinscheduler-postgresql DATABASE_PORT: 5432 DATABASE_USERNAME: root DATABASE_PASSWORD: root DATABASE_DATABASE: dolphinscheduler + DATABASE_PARAMS: characterEncoding=utf8 healthcheck: test: ["CMD", "/root/checkpoint.sh", "AlertServer"] interval: 30s @@ -126,11 +132,14 @@ services: MASTER_TASK_COMMIT_INTERVAL: "1000" MASTER_MAX_CPULOAD_AVG: "100" MASTER_RESERVED_MEMORY: "0.1" + DATABASE_TYPE: postgresql + DATABASE_DRIVER: org.postgresql.Driver DATABASE_HOST: dolphinscheduler-postgresql DATABASE_PORT: 5432 DATABASE_USERNAME: root DATABASE_PASSWORD: root DATABASE_DATABASE: dolphinscheduler + DATABASE_PARAMS: characterEncoding=utf8 ZOOKEEPER_QUORUM: dolphinscheduler-zookeeper:2181 healthcheck: test: ["CMD", "/root/checkpoint.sh", "MasterServer"] @@ -173,11 +182,14 @@ services: DATAX_HOME: "/opt/soft/datax/bin/datax.py" DOLPHINSCHEDULER_DATA_BASEDIR_PATH: /tmp/dolphinscheduler ALERT_LISTEN_HOST: dolphinscheduler-alert + DATABASE_TYPE: postgresql + DATABASE_DRIVER: org.postgresql.Driver DATABASE_HOST: dolphinscheduler-postgresql DATABASE_PORT: 5432 DATABASE_USERNAME: root DATABASE_PASSWORD: root DATABASE_DATABASE: dolphinscheduler + DATABASE_PARAMS: characterEncoding=utf8 ZOOKEEPER_QUORUM: dolphinscheduler-zookeeper:2181 RESOURCE_STORAGE_TYPE: HDFS RESOURCE_UPLOAD_PATH: /dolphinscheduler diff --git a/docker/docker-swarm/docker-stack.yml b/docker/docker-swarm/docker-stack.yml index 7206e4e678..093059c30e 100644 --- a/docker/docker-swarm/docker-stack.yml +++ b/docker/docker-swarm/docker-stack.yml @@ -58,11 +58,14 @@ services: - 12345:12345 environment: TZ: Asia/Shanghai + DATABASE_TYPE: postgresql + DATABASE_DRIVER: org.postgresql.Driver DATABASE_HOST: dolphinscheduler-postgresql DATABASE_PORT: 5432 DATABASE_USERNAME: root DATABASE_PASSWORD: root DATABASE_DATABASE: dolphinscheduler + DATABASE_PARAMS: characterEncoding=utf8 ZOOKEEPER_QUORUM: dolphinscheduler-zookeeper:2181 RESOURCE_STORAGE_TYPE: HDFS RESOURCE_UPLOAD_PATH: /dolphinscheduler @@ -89,11 +92,14 @@ services: environment: TZ: Asia/Shanghai ALERT_PLUGIN_DIR: lib/plugin/alert + DATABASE_TYPE: postgresql + DATABASE_DRIVER: org.postgresql.Driver DATABASE_HOST: dolphinscheduler-postgresql DATABASE_PORT: 5432 DATABASE_USERNAME: root DATABASE_PASSWORD: root DATABASE_DATABASE: dolphinscheduler + DATABASE_PARAMS: characterEncoding=utf8 healthcheck: test: ["CMD", "/root/checkpoint.sh", "AlertServer"] interval: 30s @@ -122,11 +128,14 @@ services: MASTER_TASK_COMMIT_INTERVAL: "1000" MASTER_MAX_CPULOAD_AVG: "100" MASTER_RESERVED_MEMORY: "0.1" + DATABASE_TYPE: postgresql + DATABASE_DRIVER: org.postgresql.Driver DATABASE_HOST: dolphinscheduler-postgresql DATABASE_PORT: 5432 DATABASE_USERNAME: root DATABASE_PASSWORD: root DATABASE_DATABASE: dolphinscheduler + DATABASE_PARAMS: characterEncoding=utf8 ZOOKEEPER_QUORUM: dolphinscheduler-zookeeper:2181 healthcheck: test: ["CMD", "/root/checkpoint.sh", "MasterServer"] @@ -167,11 +176,14 @@ services: DATAX_HOME: "/opt/soft/datax/bin/datax.py" DOLPHINSCHEDULER_DATA_BASEDIR_PATH: /tmp/dolphinscheduler ALERT_LISTEN_HOST: dolphinscheduler-alert + DATABASE_TYPE: postgresql + DATABASE_DRIVER: org.postgresql.Driver DATABASE_HOST: dolphinscheduler-postgresql DATABASE_PORT: 5432 DATABASE_USERNAME: root DATABASE_PASSWORD: root DATABASE_DATABASE: dolphinscheduler + DATABASE_PARAMS: characterEncoding=utf8 ZOOKEEPER_QUORUM: dolphinscheduler-zookeeper:2181 RESOURCE_STORAGE_TYPE: HDFS RESOURCE_UPLOAD_PATH: /dolphinscheduler